rucio-clients 37.4.0__py3-none-any.whl → 37.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rucio-clients might be problematic. Click here for more details.

Files changed (38) hide show
  1. rucio/cli/bin_legacy/rucio.py +1 -1
  2. rucio/cli/bin_legacy/rucio_admin.py +1 -1
  3. rucio/cli/did.py +2 -2
  4. rucio/cli/rse.py +2 -3
  5. rucio/cli/subscription.py +1 -1
  6. rucio/client/baseclient.py +5 -1
  7. rucio/client/didclient.py +16 -16
  8. rucio/client/downloadclient.py +15 -15
  9. rucio/client/lockclient.py +3 -3
  10. rucio/client/replicaclient.py +2 -2
  11. rucio/client/requestclient.py +6 -5
  12. rucio/client/touchclient.py +1 -1
  13. rucio/client/uploadclient.py +725 -181
  14. rucio/common/config.py +1 -2
  15. rucio/common/constants.py +16 -17
  16. rucio/common/didtype.py +2 -2
  17. rucio/common/pcache.py +20 -25
  18. rucio/common/plugins.py +10 -17
  19. rucio/common/schema/__init__.py +7 -5
  20. rucio/common/utils.py +19 -3
  21. rucio/rse/protocols/ngarc.py +2 -2
  22. rucio/rse/protocols/srm.py +1 -1
  23. rucio/rse/protocols/webdav.py +8 -1
  24. rucio/rse/rsemanager.py +2 -2
  25. rucio/vcsversion.py +3 -3
  26. {rucio_clients-37.4.0.dist-info → rucio_clients-37.6.0.dist-info}/METADATA +1 -1
  27. {rucio_clients-37.4.0.dist-info → rucio_clients-37.6.0.dist-info}/RECORD +38 -38
  28. {rucio_clients-37.4.0.dist-info → rucio_clients-37.6.0.dist-info}/WHEEL +1 -1
  29. {rucio_clients-37.4.0.data → rucio_clients-37.6.0.data}/data/etc/rse-accounts.cfg.template +0 -0
  30. {rucio_clients-37.4.0.data → rucio_clients-37.6.0.data}/data/etc/rucio.cfg.atlas.client.template +0 -0
  31. {rucio_clients-37.4.0.data → rucio_clients-37.6.0.data}/data/etc/rucio.cfg.template +0 -0
  32. {rucio_clients-37.4.0.data → rucio_clients-37.6.0.data}/data/requirements.client.txt +0 -0
  33. {rucio_clients-37.4.0.data → rucio_clients-37.6.0.data}/data/rucio_client/merge_rucio_configs.py +0 -0
  34. {rucio_clients-37.4.0.data → rucio_clients-37.6.0.data}/scripts/rucio +0 -0
  35. {rucio_clients-37.4.0.data → rucio_clients-37.6.0.data}/scripts/rucio-admin +0 -0
  36. {rucio_clients-37.4.0.dist-info → rucio_clients-37.6.0.dist-info}/licenses/AUTHORS.rst +0 -0
  37. {rucio_clients-37.4.0.dist-info → rucio_clients-37.6.0.dist-info}/licenses/LICENSE +0 -0
  38. {rucio_clients-37.4.0.dist-info → rucio_clients-37.6.0.dist-info}/top_level.txt +0 -0
@@ -56,18 +56,32 @@ if TYPE_CHECKING:
56
56
 
57
57
 
58
58
  class UploadClient:
59
-
60
59
  def __init__(
61
- self,
62
- _client: Optional[Client] = None,
63
- logger: Optional["LoggerFunction"] = None,
64
- tracing: bool = True
60
+ self,
61
+ _client: Optional[Client] = None,
62
+ logger: Optional["LoggerFunction"] = None,
63
+ tracing: bool = True
65
64
  ):
66
65
  """
67
- Initialises the basic settings for an UploadClient object
68
-
69
- :param _client: - Optional: rucio.client.client.Client object. If None, a new object will be created.
70
- :param logger: - Optional: logging.Logger object. If None, default logger will be used.
66
+ Initialize the UploadClient with the necessary configuration to manage file uploads.
67
+
68
+ This method is used to create a new UploadClient instance that can upload files. It
69
+ allows the use of an existing Rucio Client, a custom logger, and tracing for debug
70
+ information during the upload process.
71
+
72
+ Parameters
73
+ ----------
74
+ _client
75
+ An existing Rucio `Client` instance to reuse. If not provided, a new one is created.
76
+ logger
77
+ A logger function. If not provided, the default Python logger is used.
78
+ tracing
79
+ Indicates whether to enable tracing to capture upload activity details.
80
+
81
+ Raises
82
+ ------
83
+ InputValidationError
84
+ If the client account is not found or is invalid, preventing upload setup.
71
85
  """
72
86
  if not logger:
73
87
  self.logger = logging.log
@@ -77,7 +91,9 @@ class UploadClient:
77
91
  self.client: Final[Client] = _client if _client else Client()
78
92
  self.client_location = detect_client_location()
79
93
  # if token should be used, use only JWT tokens
80
- self.auth_token: Optional[str] = self.client.auth_token if len(self.client.auth_token.split(".")) == 3 else None
94
+ self.auth_token: Optional[str] = (
95
+ self.client.auth_token if len(self.client.auth_token.split(".")) == 3 else None
96
+ )
81
97
  self.tracing = tracing
82
98
  if not self.tracing:
83
99
  logger(logging.DEBUG, 'Tracing is turned off.')
@@ -86,10 +102,14 @@ class UploadClient:
86
102
  try:
87
103
  acc = self.client.whoami()
88
104
  if acc is None:
89
- raise InputValidationError('account not specified and rucio has no account with your identity')
105
+ raise InputValidationError(
106
+ 'Account not specified and rucio has no account with your identity'
107
+ )
90
108
  self.client.account = acc['account']
91
109
  except RucioException as e:
92
- raise InputValidationError('account not specified and problem with rucio: %s' % e)
110
+ raise InputValidationError(
111
+ f'Account not specified and problem with rucio: {e}'
112
+ )
93
113
  self.logger(logging.DEBUG, 'Discovered account as "%s"' % self.client.account)
94
114
  self.default_file_scope: Final[str] = 'user.' + self.client.account
95
115
  self.rses = {}
@@ -112,34 +132,204 @@ class UploadClient:
112
132
  activity: Optional[str] = None
113
133
  ) -> int:
114
134
  """
115
- :param items: List of dictionaries. Each dictionary describing a file to upload. Keys:
116
- path - path of the file that will be uploaded
117
- rse - rse expression/name (e.g. 'CERN-PROD_DATADISK') where to upload the file
118
- did_scope - Optional: custom did scope (Default: user.<account>)
119
- did_name - Optional: custom did name (Default: name of the file)
120
- dataset_scope - Optional: custom dataset scope
121
- dataset_name - Optional: custom dataset name
122
- dataset_meta - Optional: custom metadata for dataset
123
- impl - Optional: name of the protocol implementation to be used to upload this item.
124
- force_scheme - Optional: force a specific scheme (if PFN upload this will be overwritten) (Default: None)
125
- pfn - Optional: use a given PFN (this sets no_register to True, and no_register becomes mandatory)
126
- no_register - Optional: if True, the file will not be registered in the rucio catalogue
127
- register_after_upload - Optional: if True, the file will be registered after successful upload
128
- lifetime - Optional: the lifetime of the file after it was uploaded
129
- transfer_timeout - Optional: time after the upload will be aborted
130
- guid - Optional: guid of the file
131
- recursive - Optional: if set, parses the folder structure recursively into collections
132
- :param summary_file_path: Optional: a path where a summary in form of a json file will be stored
133
- :param traces_copy_out: reference to an external list, where the traces should be uploaded
134
- :param ignore_availability: ignore the availability of a RSE
135
- :param activity: the activity set to the rule if no dataset is specified
136
-
137
- :returns: 0 on success
138
-
139
- :raises InputValidationError: if any input arguments are in a wrong format
140
- :raises RSEWriteBlocked: if a given RSE is not available for writing
141
- :raises NoFilesUploaded: if no files were successfully uploaded
142
- :raises NotAllFilesUploaded: if not all files were successfully uploaded
135
+ Uploads one or more files to an RSE (Rucio Storage Element) and optionally registers them.
136
+
137
+ An overview of this method's performed actions:
138
+ 1. Collects and validates file info from the passed `items` (directories may be
139
+ also included), ensuring valid paths exist on the local filesystem. If an RSE
140
+ expression is provided, a single RSE is picked at random from it.
141
+
142
+ 2. Checks the RSE's availability for writing (unless `ignore_availability` is True).
143
+
144
+ 3. Optionally registers each file in the Rucio Catalog, handling the DID creation,
145
+ dataset creation/attachment, and replication rules as needed.
146
+
147
+ 4. Uploads the files using the underlying protocol handlers and verifies checksums
148
+ if desired/possible. Partial or failed uploads raise exceptions.
149
+
150
+ 5. (Optional) Produces a JSON summary file at `summary_file_path`, listing the final
151
+ PFNs, checksums, and other info for all successfully uploaded files.
152
+
153
+ Parameters
154
+ ----------
155
+ items
156
+ A sequence of dictionaries, each describing a file to upload (or a
157
+ directory to be scanned). For each item, the supported keys are:
158
+
159
+ * **`path`** (PathTypeAlias, required):
160
+ The local path to the file or directory. If this is a directory and
161
+ `recursive` is True, the directory (and its subdirectories) are traversed.
162
+
163
+ * **`rse`** (str, required):
164
+ The target RSE or an RSE expression where the upload should be placed. If
165
+ an expression is provided (e.g., "tier=1"), one RSE from that expression
166
+ is chosen randomly.
167
+
168
+ * **`did_scope`** (str, not required):
169
+ The Rucio scope in which to register the file DID. Defaults to `user.<account>`.
170
+
171
+ * **`did_name`** (str, not required):
172
+ The logical filename in Rucio. Defaults to the local basename if not provided.
173
+
174
+ * **`lifetime`** (int, not required):
175
+ The lifetime (in seconds) to apply when creating a new replication rule.
176
+ For file uploads without a dataset, a new rule with that lifetime is created
177
+ if the file DID does not already exist in Rucio. For a new dataset, the
178
+ dataset is created with a rule using this lifetime, but if the dataset
179
+ already exists and you specify a lifetime, an error is raised.
180
+
181
+ _**Note:**_ **`lifetime`** is not automatically applied to nested containers
182
+ or datasets in recursive mode.
183
+
184
+ * **`impl`** (str, not required):
185
+ Name of the protocol implementation to be used for uploading this item.
186
+ For example, `"rucio.rse.protocols.gfal.Default"`.
187
+
188
+ * **`pfn`** (str, not required):
189
+ Allows you to explicitly set the Physical File Name (PFN) for the upload,
190
+ determining exactly where the file is placed on the storage. However, for
191
+ deterministic RSEs, specifying a PFN causes the client to skip registering
192
+ the file under the usual deterministic scheme. For non-deterministic RSEs,
193
+ you can still force the file to be registered in the Rucio catalog after
194
+ being uploaded, using `no_register=False` along with `register_after_upload=True`
195
+ (or by manually handling the registration later).
196
+
197
+ * **`force_scheme`** (str, not required):
198
+ Enforces the use of a specific protocol scheme (e.g., davs, https) during
199
+ file uploads. If the selected protocol is not compatible, the upload will
200
+ stop and raise an error instead of falling back to any other scheme.
201
+
202
+ * **`transfer_timeout`** (int, not required):
203
+ A maximum duration (in seconds) to wait for each individual file transfer
204
+ to complete. If the file transfer does not finish before this timeout
205
+ elapses, the operation will be aborted and retried one last time. When
206
+ transfer_timeout is None, no specific timeout is enforced, and the transfer
207
+ may continue until it completes or fails for another reason.
208
+
209
+ * **`guid`** (str, not required):
210
+ If provided, Rucio will use this GUID. If not provided and the file is
211
+ “pool.root” with `no_register` unset, Rucio tries to extract the GUID via
212
+ `pool_extractFileIdentifier`, raising an error if that fails. Otherwise, a
213
+ random GUID will be generated.
214
+
215
+ * **`no_register`** (bool, not required, default=False):
216
+ If set to True, the file is not registered in the Rucio Catalog, i.e., there
217
+ is no DID creation, no replica entry, and no rules. This is appropriate if
218
+ you plan to register the replica or create rules separately.
219
+
220
+ _**Note:**_ If **`recursive`**=True, the method still creates datasets
221
+ and/or containers for the directories when needed.
222
+
223
+ * **`register_after_upload`** (bool, not required, default=False):
224
+ If set to True, the file is uploaded first, and only then is the DID created
225
+ or updated in the Catalog. This can be useful when you want the actual data
226
+ on storage before finalizing the registration. By default (False), the file
227
+ is registered in Rucio before the physical upload if `no_register` is False.
228
+
229
+ * **`recursive`** (bool, not required, default=False):
230
+ If set to `True`, the method treats the specified path as a directory and
231
+ (depending on the combination with other parameters) recursively traverses
232
+ its subdirectories, mapping them into container/dataset hierarchies. Single
233
+ top-level file paths are ignored, but individual files found in subdirectories
234
+ are processed. Empty directories or non-existent paths also produce a warning.
235
+ If `False`, then top-level file paths or the direct children-files of the
236
+ given top-level directory are only processed (subdirectories are ignored,
237
+ and no container structure is created).
238
+
239
+ * **`dataset_scope`** / **`dataset_name`** (str, not required):
240
+ To register uploaded files into a dataset DID, you need to specify both
241
+ dataset_name and dataset_scope. With no_register=False, the client ensures
242
+ {dataset_scope}:{dataset_name} exists (creating it with a replication rule
243
+ if it doesn't), or simply attaching new files if it does. If the dataset
244
+ already exists and you specify a new lifetime, or if a checksum mismatch
245
+ is detected, registration fails. In non-recursive mode, only files in the
246
+ top-level directory are attached to the dataset and subdirectories are
247
+ skipped with a warning. In recursive mode, the client aims to create
248
+ containers for directories containing only subdirectories and datasets for
249
+ directories containing only files (raising an error if the top-level folder
250
+ mixes files and directories). If the top-level directory has subdirectories,
251
+ the user-supplied dataset_name is effectively ignored at that level (each
252
+ subdirectory becomes its own dataset or container); if there are no
253
+ subdirectories, the entire folder is registered as a single dataset.
254
+
255
+ * **`dataset_meta`** (dict, not required):
256
+ Additional metadata (e.g., `{'project': 'myProject'}`) to attach to the
257
+ newly created dataset when: the dataset does not already exist, `recursive=False`,
258
+ `no_register=False` and both `dataset_scope` and `dataset_name` are provided.
259
+
260
+ _**Note:**_ If multiple files share the same `dataset_scope` and `dataset_name`,
261
+ then if a dataset is created, it considers only the first item’s dataset_meta.
262
+ summary_file_path
263
+ If specified, a JSON file is created with a summary of each successfully
264
+ uploaded file, including checksum, PFN, scope, and name entries.
265
+ traces_copy_out
266
+ A list reference for collecting the trace dictionaries that Rucio generates
267
+ while iterating over each file. A new trace dictionary is appended to this list
268
+ for each file considered (even those ultimately skipped or already on the RSE).
269
+ ignore_availability
270
+ If set to True, the RSE's "write availability" is not enforced. By default,
271
+ this is False, and an RSE marked as unavailable for writing will raise an error.
272
+ activity
273
+ If you are uploading files without a parent dataset, this string sets the “activity”
274
+ on the replication rule that Rucio creates for each file (e.g., "Analysis"),
275
+ which can affect RSE queue priorities.
276
+
277
+ _**Note:**_ If your files are uploaded into a dataset, the dataset’s replication
278
+ rule does not use this activity parameter.
279
+
280
+ Returns
281
+ -------
282
+ int
283
+ Status code (``0`` if all files were uploaded successfully).
284
+
285
+ Raises
286
+ ------
287
+ NoFilesUploaded
288
+ Raised if none of the requested files could be uploaded.
289
+ NotAllFilesUploaded
290
+ Raised if some files were successfully uploaded, but others failed.
291
+ RSEWriteBlocked
292
+ Raised if `ignore_availability=False` but the chosen RSE does not allow writing.
293
+ InputValidationError
294
+ Raised if mandatory fields are missing, if conflicting DIDs are found,
295
+ or if no valid files remain after input parsing.
296
+
297
+ Examples
298
+ --------
299
+ ??? Example
300
+
301
+ Upload a single local file to the *CERN-PROD* RSE and write a JSON summary to
302
+ ``upload_summary.json``:
303
+
304
+ ```python
305
+ from rucio.client.uploadclient import UploadClient
306
+ upload_client = UploadClient()
307
+ items = [
308
+ {"path": "/data/file1.txt",
309
+ "rse": "CERN-PROD", # target RSE
310
+ "did_scope": "user.alice", # optional; defaults to user.<account>
311
+ "did_name": "file1.txt"} # optional; defaults to basename
312
+ ]
313
+ upload_client.upload(items, summary_file_path="upload_summary.json")
314
+ ```
315
+
316
+ Recursively upload every file found under ``/data/dataset`` into a new
317
+ dataset ``user.alice:mydataset`` on a random RSE that matches the
318
+ expression ``tier=1``; collect per-file *trace* dictionaries for later
319
+ inspection:
320
+
321
+ ```python
322
+ traces: list[TraceBaseDict] = []
323
+ dir_item = {
324
+ "path": "/data/dataset",
325
+ "rse": "tier=1", # RSE expression; one will be chosen
326
+ "recursive": True,
327
+ "dataset_scope": "user.alice",
328
+ "dataset_name": "mydataset",
329
+ "dataset_meta": {"project": "demo"},
330
+ }
331
+ upload_client.upload([dir_item], traces_copy_out=traces)
332
+ ```
143
333
  """
144
334
  # helper to get rse from rse_expression:
145
335
  def _pick_random_rse(rse_expression: str) -> dict[str, Any]:
@@ -198,7 +388,7 @@ class UploadClient:
198
388
  delete_existing = False
199
389
 
200
390
  trace = copy.deepcopy(self.trace)
201
- # appending trace to list reference, if the reference exists
391
+ # appending trace to the list reference if the reference exists
202
392
  if traces_copy_out is not None:
203
393
  traces_copy_out.append(trace)
204
394
 
@@ -218,7 +408,8 @@ class UploadClient:
218
408
  logger(logging.ERROR, 'PFN has to be defined for NON-DETERMINISTIC RSE.')
219
409
  continue
220
410
  if pfn and is_deterministic:
221
- logger(logging.WARNING, 'Upload with given pfn implies that no_register is True, except non-deterministic RSEs')
411
+ logger(logging.WARNING,
412
+ 'Upload with given pfn implies that no_register is True, except non-deterministic RSEs')
222
413
  no_register = True
223
414
 
224
415
  # resolving local area networks
@@ -228,7 +419,7 @@ class UploadClient:
228
419
  rse_attributes = self.client.list_rse_attributes(rse)
229
420
  except:
230
421
  logger(logging.WARNING, 'Attributes of the RSE: %s not available.' % rse)
231
- if (self.client_location and 'lan' in rse_settings['domain'] and RseAttr.SITE in rse_attributes):
422
+ if self.client_location and 'lan' in rse_settings['domain'] and RseAttr.SITE in rse_attributes:
232
423
  if self.client_location['site'] == rse_attributes[RseAttr.SITE]:
233
424
  domain = 'lan'
234
425
  logger(logging.DEBUG, '{} domain is used for the upload'.format(domain))
@@ -240,12 +431,22 @@ class UploadClient:
240
431
  # impl = self.preferred_impl(rse_settings, domain)
241
432
 
242
433
  if not no_register and not register_after_upload:
243
- self._register_file(file, registered_dataset_dids, ignore_availability=ignore_availability, activity=activity)
434
+ self._register_file(file,
435
+ registered_dataset_dids,
436
+ ignore_availability=ignore_availability,
437
+ activity=activity)
244
438
 
245
- # if register_after_upload, file should be overwritten if it is not registered
246
- # otherwise if file already exists on RSE we're done
439
+ # if register_after_upload, the file should be overwritten if it is not registered,
440
+ # otherwise if the file already exists on RSE we're done
247
441
  if register_after_upload:
248
- if rsemgr.exists(rse_settings, pfn if pfn else file_did, domain=domain, scheme=force_scheme, impl=impl, auth_token=self.auth_token, vo=self.client.vo, logger=logger): # type: ignore (pfn is str)
442
+ if rsemgr.exists(rse_settings,
443
+ pfn if pfn else file_did, # type: ignore (pfn is str)
444
+ domain=domain,
445
+ scheme=force_scheme,
446
+ impl=impl,
447
+ auth_token=self.auth_token,
448
+ vo=self.client.vo,
449
+ logger=logger):
249
450
  try:
250
451
  self.client.get_did(file['did_scope'], file['did_name'])
251
452
  logger(logging.INFO, 'File already registered. Skipping upload.')
@@ -255,22 +456,48 @@ class UploadClient:
255
456
  logger(logging.INFO, 'File already exists on RSE. Previous left overs will be overwritten.')
256
457
  delete_existing = True
257
458
  elif not is_deterministic and not no_register:
258
- if rsemgr.exists(rse_settings, pfn, domain=domain, scheme=force_scheme, impl=impl, auth_token=self.auth_token, vo=self.client.vo, logger=logger): # type: ignore (pfn is str)
259
- logger(logging.INFO, 'File already exists on RSE with given pfn. Skipping upload. Existing replica has to be removed first.')
459
+ if rsemgr.exists(rse_settings,
460
+ pfn, # type: ignore (pfn is str)
461
+ domain=domain,
462
+ scheme=force_scheme,
463
+ impl=impl,
464
+ auth_token=self.auth_token,
465
+ vo=self.client.vo,
466
+ logger=logger):
467
+ logger(logging.INFO,
468
+ 'File already exists on RSE with given pfn. Skipping upload. Existing replica has to be removed first.')
260
469
  trace['stateReason'] = 'File already exists'
261
470
  continue
262
- elif rsemgr.exists(rse_settings, file_did, domain=domain, scheme=force_scheme, impl=impl, auth_token=self.auth_token, vo=self.client.vo, logger=logger):
471
+ elif rsemgr.exists(rse_settings,
472
+ file_did,
473
+ domain=domain,
474
+ scheme=force_scheme,
475
+ impl=impl,
476
+ auth_token=self.auth_token,
477
+ vo=self.client.vo,
478
+ logger=logger):
263
479
  logger(logging.INFO, 'File already exists on RSE with different pfn. Skipping upload.')
264
480
  trace['stateReason'] = 'File already exists'
265
481
  continue
266
482
  else:
267
- if rsemgr.exists(rse_settings, pfn if pfn else file_did, domain=domain, scheme=force_scheme, impl=impl, auth_token=self.auth_token, vo=self.client.vo, logger=logger): # type: ignore (pfn is str)
483
+ if rsemgr.exists(rse_settings,
484
+ pfn if pfn else file_did, # type: ignore (pfn is str)
485
+ domain=domain,
486
+ scheme=force_scheme,
487
+ impl=impl,
488
+ auth_token=self.auth_token,
489
+ vo=self.client.vo,
490
+ logger=logger):
268
491
  logger(logging.INFO, 'File already exists on RSE. Skipping upload')
269
492
  trace['stateReason'] = 'File already exists'
270
493
  continue
271
494
 
272
495
  # protocol handling and upload
273
- protocols = rsemgr.get_protocols_ordered(rse_settings=rse_settings, operation='write', scheme=force_scheme, domain=domain, impl=impl)
496
+ protocols = rsemgr.get_protocols_ordered(rse_settings=rse_settings,
497
+ operation='write',
498
+ scheme=force_scheme,
499
+ domain=domain,
500
+ impl=impl)
274
501
  protocols.reverse()
275
502
  success = False
276
503
  state_reason = ''
@@ -279,11 +506,9 @@ class UploadClient:
279
506
  protocol = protocols.pop()
280
507
  cur_scheme = protocol['scheme']
281
508
  logger(logging.INFO, 'Trying upload with %s to %s' % (cur_scheme, rse))
282
- lfn: "LFNDict" = {
283
- 'name': file['did_name'],
284
- 'scope': file['did_scope']
285
- }
286
- lfn['filename'] = basename
509
+ lfn: "LFNDict" = {'name': file['did_name'],
510
+ 'scope': file['did_scope'],
511
+ 'filename': basename}
287
512
 
288
513
  for checksum_name in GLOBALLY_SUPPORTED_CHECKSUMS:
289
514
  if checksum_name in file:
@@ -313,7 +538,10 @@ class UploadClient:
313
538
  logger(logging.DEBUG, 'Upload done.')
314
539
  success = True
315
540
  file['upload_result'] = {0: True, 1: None, 'success': True, 'pfn': pfn} # TODO: needs to be removed
316
- except (ServiceUnavailable, ResourceTemporaryUnavailable, RSEOperationNotSupported, RucioException) as error:
541
+ except (ServiceUnavailable,
542
+ ResourceTemporaryUnavailable,
543
+ RSEOperationNotSupported,
544
+ RucioException) as error:
317
545
  logger(logging.WARNING, 'Upload attempt failed')
318
546
  logger(logging.INFO, 'Exception: %s' % str(error), exc_info=True)
319
547
  state_reason = str(error)
@@ -331,7 +559,10 @@ class UploadClient:
331
559
  registration_succeeded = True
332
560
  if not no_register:
333
561
  if register_after_upload:
334
- self._register_file(file, registered_dataset_dids, ignore_availability=ignore_availability, activity=activity)
562
+ self._register_file(file,
563
+ registered_dataset_dids,
564
+ ignore_availability=ignore_availability,
565
+ activity=activity)
335
566
  else:
336
567
  replica_for_api = self._convert_file_for_api(file)
337
568
  try:
@@ -341,10 +572,13 @@ class UploadClient:
341
572
  logger(logging.ERROR, 'Failed to update replica state for file {}'.format(basename))
342
573
  logger(logging.DEBUG, 'Details: {}'.format(str(error)))
343
574
 
344
- # add file to dataset if needed
575
+ # add the file to dataset if needed
345
576
  if dataset_did_str and not no_register:
346
577
  try:
347
- self.client.attach_dids(file['dataset_scope'], file['dataset_name'], [file_did]) # type: ignore (`dataset_scope` and `dataset_name` always exist if `dataset_did_str`)
578
+ self.client.attach_dids(
579
+ file['dataset_scope'], # type: ignore (`dataset_scope` always exists if `dataset_did_str`)
580
+ file['dataset_name'], # type: ignore (`dataset_name` always exists if `dataset_did_str`)
581
+ [file_did])
348
582
  except Exception as error:
349
583
  registration_succeeded = False
350
584
  logger(logging.ERROR, 'Failed to attach file to the dataset')
@@ -386,15 +620,31 @@ class UploadClient:
386
620
  raise NotAllFilesUploaded()
387
621
  return 0
388
622
 
389
- def _add_bittorrent_meta(self, file: "Mapping[str, Any]") -> None:
390
- pieces_root, pieces_layers, piece_length = bittorrent_v2_merkle_sha256(os.path.join(file['dirname'], file['basename']))
623
+ def _add_bittorrent_meta(
624
+ self,
625
+ file: "Mapping[str, Any]"
626
+ ) -> None:
627
+ """
628
+ Add BitTorrent v2 metadata to the file DID.
629
+
630
+ This method calculates the BitTorrent v2 pieces root, layers, and piece length for
631
+ the specified local file, and updates the file DID's metadata with these values.
632
+
633
+ Parameters
634
+ ----------
635
+ file
636
+ A dictionary that must include 'dirname', 'basename', 'did_scope',
637
+ and 'did_name', describing the file path and the associated DID.
638
+ """
639
+ pieces_root, pieces_layers, piece_length = bittorrent_v2_merkle_sha256(
640
+ os.path.join(file['dirname'], file['basename']))
391
641
  bittorrent_meta = {
392
642
  'bittorrent_pieces_root': base64.b64encode(pieces_root).decode(),
393
643
  'bittorrent_pieces_layers': base64.b64encode(pieces_layers).decode(),
394
644
  'bittorrent_piece_length': piece_length,
395
645
  }
396
646
  self.client.set_metadata_bulk(scope=file['did_scope'], name=file['did_name'], meta=bittorrent_meta)
397
- self.logger(logging.INFO, f"Added bittorrent metadata to file DID {file['did_scope']}:{file['did_name']}")
647
+ self.logger(logging.INFO, f"Added BitTorrent metadata to file DID {file['did_scope']}:{file['did_name']}")
398
648
 
399
649
  def _register_file(
400
650
  self,
@@ -404,17 +654,33 @@ class UploadClient:
404
654
  activity: Optional[str] = None
405
655
  ) -> None:
406
656
  """
407
- Registers the given file in Rucio. Creates a dataset if
408
- needed. Registers the file DID and creates the replication
409
- rule if needed. Adds a replica to the file did.
410
- (This function is meant to be used as class internal only)
411
-
412
- :param file: dictionary describing the file
413
- :param registered_dataset_dids: set of dataset dids that were already registered
414
- :param ignore_availability: ignore the availability of a RSE
415
- :param activity: the activity set to the rule if no dataset is specified
416
-
417
- :raises DataIdentifierAlreadyExists: if file DID is already registered and the checksums do not match
657
+ Register a single file DID in Rucio, optionally creating its parent dataset if needed.
658
+
659
+ Ensures that a file is known in the Rucio catalog under the specified scope. If a
660
+ dataset is specified in `file` and it does not yet exist, the method creates it and
661
+ attaches the file to that dataset, applying replication rules as appropriate. If no
662
+ dataset is provided and the file DID does not yet exist in Rucio, the method creates
663
+ a replication rule for the newly added file. If the file DID already exists, no new
664
+ top-level rule is created (the file’s existing rules or attachments remain unchanged).
665
+ Checksums are compared to prevent conflicts if the file is already registered.
666
+
667
+ Parameters
668
+ ----------
669
+ file
670
+ A dictionary containing file information (e.g., 'did_scope', 'did_name', 'adler32', etc.).
671
+ registered_dataset_dids
672
+ A set of dataset DIDs already registered to avoid duplicates.
673
+ ignore_availability
674
+ If True, creates replication rules even when the RSE is marked unavailable.
675
+ activity
676
+ Specifies the transfer activity (e.g., 'User Subscriptions') for the replication rule.
677
+
678
+ Raises
679
+ ------
680
+ InputValidationError
681
+ If a dataset already exists, but the caller attempts to set a new lifetime for it.
682
+ DataIdentifierAlreadyExists
683
+ If the local checksum differs from the remote checksum.
418
684
  """
419
685
  logger = self.logger
420
686
  logger(logging.DEBUG, 'Registering file')
@@ -426,7 +692,8 @@ class UploadClient:
426
692
  except ScopeNotFound:
427
693
  pass
428
694
  if account_scopes and file['did_scope'] not in account_scopes:
429
- logger(logging.WARNING, 'Scope {} not found for the account {}.'.format(file['did_scope'], self.client.account))
695
+ logger(logging.WARNING,
696
+ 'Scope {} not found for the account {}.'.format(file['did_scope'], self.client.account))
430
697
 
431
698
  rse = file['rse']
432
699
  dataset_did_str = file.get('dataset_did_str')
@@ -447,8 +714,8 @@ class UploadClient:
447
714
  except DataIdentifierAlreadyExists:
448
715
  logger(logging.INFO, 'Dataset %s already exists - no rule will be created' % dataset_did_str)
449
716
  if file.get('lifetime') is not None:
450
- raise InputValidationError('Dataset %s exists and lifetime %s given. Prohibited to modify parent dataset lifetime.' % (dataset_did_str,
451
- file.get('lifetime')))
717
+ raise InputValidationError(
718
+ 'Dataset %s exists and lifetime %s given. Prohibited to modify parent dataset lifetime.' % (dataset_did_str, file.get('lifetime')))
452
719
  else:
453
720
  logger(logging.DEBUG, 'Skipping dataset registration')
454
721
 
@@ -457,16 +724,17 @@ class UploadClient:
457
724
  file_did = {'scope': file_scope, 'name': file_name}
458
725
  replica_for_api = self._convert_file_for_api(file)
459
726
  try:
460
- # if the remote checksum is different this did must not be used
727
+ # if the remote checksum is different, this DID must not be used
461
728
  meta = self.client.get_metadata(file_scope, file_name)
462
729
  logger(logging.INFO, 'File DID already exists')
463
730
  logger(logging.DEBUG, 'local checksum: %s, remote checksum: %s' % (file['adler32'], meta['adler32']))
464
731
 
465
732
  if str(meta['adler32']).lstrip('0') != str(file['adler32']).lstrip('0'):
466
- logger(logging.ERROR, 'Local checksum %s does not match remote checksum %s' % (file['adler32'], meta['adler32']))
733
+ logger(logging.ERROR,
734
+ 'Local checksum %s does not match remote checksum %s' % (file['adler32'], meta['adler32']))
467
735
  raise DataIdentifierAlreadyExists
468
736
 
469
- # add file to rse if it is not registered yet
737
+ # add the file to rse if it is not registered yet
470
738
  replicastate = list(self.client.list_replicas([file_did], all_states=True))
471
739
  if rse not in replicastate[0]['rses']:
472
740
  self.client.add_replicas(rse=rse, files=[replica_for_api])
@@ -479,17 +747,51 @@ class UploadClient:
479
747
  logger(logging.INFO, 'Successfully added replica in Rucio catalogue at %s' % rse)
480
748
  if not dataset_did_str:
481
749
  # only need to add rules for files if no dataset is given
482
- self.client.add_replication_rule([file_did], copies=1, rse_expression=rse, lifetime=file.get('lifetime'), ignore_availability=ignore_availability, activity=activity)
750
+ self.client.add_replication_rule([file_did],
751
+ copies=1,
752
+ rse_expression=rse,
753
+ lifetime=file.get('lifetime'),
754
+ ignore_availability=ignore_availability,
755
+ activity=activity)
483
756
  logger(logging.INFO, 'Successfully added replication rule at %s' % rse)
484
757
 
485
- def _get_file_guid(self, file: "Mapping[str, Any]") -> str:
758
+ def _get_file_guid(
759
+ self,
760
+ file: "Mapping[str, Any]"
761
+ ) -> str:
486
762
  """
487
- Get the guid of a file, trying different strategies
488
- (This function is meant to be used as class internal only)
763
+ Returns the unique identifier (GUID) for the given file.
764
+
765
+ If no GUID exists and the filename suggests a ROOT file, it extracts it with
766
+ `pool_extractFileIdentifier`. If a GUID exists, it is returned without dashes.
767
+ Otherwise, a new GUID is generated.
768
+
769
+ Parameters
770
+ ----------
771
+ file
772
+ A dictionary describing the file, expected to include:
773
+
774
+ * **`basename`**:
775
+ The base filename.
489
776
 
490
- :param file: dictionary describing the file
777
+ * **`path`**:
778
+ The path to the file.
491
779
 
492
- :returns: the guid
780
+ * **`guid`** (optional):
781
+ A pre-assigned GUID string.
782
+
783
+ * **`no_register`** (optional):
784
+ If True, skip attempts to derive a GUID for ROOT files.
785
+
786
+ Returns
787
+ -------
788
+ str
789
+ A string containing the file's GUID, stripped of dashes and in lowercase.
790
+
791
+ Raises
792
+ ------
793
+ RucioException
794
+ If GUID extraction using the `pool_extractFileIdentifier` command fails.
493
795
  """
494
796
  guid = file.get('guid')
495
797
  if not guid and 'pool.root' in file['basename'].lower() and not file.get('no_register'):
@@ -514,14 +816,24 @@ class UploadClient:
514
816
  item: "FileToUploadDict"
515
817
  ) -> "FileToUploadWithCollectedInfoDict":
516
818
  """
517
- Collects infos (e.g. size, checksums, etc.) about the file and
518
- returns them as a dictionary
519
- (This function is meant to be used as class internal only)
520
-
521
- :param filepath: path where the file is stored
522
- :param item: input options for the given file
523
-
524
- :returns: a dictionary containing all collected info and the input options
819
+ Collects and returns essential file descriptors (e.g., size, checksums, GUID, etc.).
820
+
821
+ This method computes the file's size, calculates its Adler-32 and MD5 checksums,
822
+ and retrieves the file's GUID. These values, along with other existing fields from
823
+ the input dictionary, are returned in a new dictionary.
824
+
825
+ Parameters
826
+ ----------
827
+ filepath
828
+ The local filesystem path to the file.
829
+ item
830
+ A dictionary containing initial upload parameters (e.g., RSE name, scope) for the
831
+ file. Some of its fields may be updated or augmented in the returned dictionary.
832
+
833
+ Returns
834
+ -------
835
+ "FileToUploadWithCollectedInfoDict"
836
+ A new dictionary enriched with relevant file descriptors.
525
837
  """
526
838
  new_item = copy.deepcopy(item)
527
839
  new_item = cast("FileToUploadWithCollectedInfoDict", new_item)
@@ -541,17 +853,51 @@ class UploadClient:
541
853
 
542
854
  return new_item
543
855
 
544
- def _collect_and_validate_file_info(self, items: "Iterable[FileToUploadDict]") -> list["FileToUploadWithCollectedInfoDict"]:
856
+ def _collect_and_validate_file_info(
857
+ self,
858
+ items: "Iterable[FileToUploadDict]"
859
+ ) -> list["FileToUploadWithCollectedInfoDict"]:
545
860
  """
546
- Checks if there are any inconsistencies within the given input
547
- options and stores the output of _collect_file_info for every file
548
- (This function is meant to be used as class internal only)
549
-
550
- :param filepath: list of dictionaries with all input files and options
551
-
552
- :returns: a list of dictionaries containing all descriptions of the files to upload
553
-
554
- :raises InputValidationError: if an input option has a wrong format
861
+ Collect and verify local file info for upload, optionally registering folders as
862
+ datasets/containers.
863
+
864
+ This method iterates over the provided items, each describing a local path and
865
+ associated upload parameters, checks that each item has a valid path and RSE, and
866
+ computes basic file details such as size and checksums. If the item is a directory
867
+ and `recursive` is set, the method calls `_recursive` to traverse subdirectories,
868
+ creating or attaching them as Rucio datasets or containers.
869
+
870
+ Parameters
871
+ ----------
872
+ items
873
+ An iterable of dictionaries describing files or directories, where each dictionary
874
+ typically has:
875
+
876
+ * **`path`**:
877
+ Local file system path
878
+
879
+ * **`rse`**:
880
+ Name of the RSE destination
881
+
882
+ * **`pfn`** (optional):
883
+ Physical file name (PFN)
884
+
885
+ * **`impl`** (optional):
886
+ Protocol implementation
887
+
888
+ * **`recursive`** (optional):
889
+ Whether to traverse directories recursively
890
+
891
+ Returns
892
+ -------
893
+ list["FileToUploadWithCollectedInfoDict"]
894
+ A list of dictionaries enriched with file descriptors (size, checksums, etc.)
895
+ and ready for further upload processing.
896
+
897
+ Raises
898
+ ------
899
+ InputValidationError
900
+ If no valid files are found.
555
901
  """
556
902
  logger = self.logger
557
903
  files: list["FileToUploadWithCollectedInfoDict"] = []
@@ -583,7 +929,8 @@ class UploadClient:
583
929
  if not len(fnames) and not len(subdirs):
584
930
  logger(logging.WARNING, 'Skipping %s because it is empty.' % dname)
585
931
  elif not len(fnames):
586
- logger(logging.WARNING, 'Skipping %s because it has no files in it. Subdirectories are not supported.' % dname)
932
+ logger(logging.WARNING,
933
+ 'Skipping %s because it has no files in it. Subdirectories are not supported.' % dname)
587
934
  elif os.path.isdir(path) and recursive:
588
935
  files.extend(cast("list[FileToUploadWithCollectedInfoDict]", self._recursive(item)))
589
936
  elif os.path.isfile(path) and not recursive:
@@ -599,15 +946,27 @@ class UploadClient:
599
946
 
600
947
  return files
601
948
 
602
- def _convert_file_for_api(self, file: "Mapping[str, Any]") -> dict[str, Any]:
949
+ def _convert_file_for_api(
950
+ self,
951
+ file: "Mapping[str, Any]"
952
+ ) -> dict[str, Any]:
603
953
  """
604
- Creates a new dictionary that contains only the values
605
- that are needed for the upload with the correct keys
606
- (This function is meant to be used as class internal only)
607
-
608
- :param file: dictionary describing a file to upload
609
-
610
- :returns: dictionary containing not more then the needed values for the upload
954
+ Create a minimal dictionary of file attributes for the Rucio API.
955
+
956
+ This method extracts only the necessary fields from the provided file dictionary,
957
+ producing a new dictionary that is suitable for registering or updating
958
+ a file replica in Rucio.
959
+
960
+ Parameters
961
+ ----------
962
+ file
963
+ A dictionary describing a file, expected to include at least `did_scope`,
964
+ `did_name`, `bytes`, `adler32`, `md5`, `meta`, `state`, and optionally `pfn`.
965
+
966
+ Returns
967
+ -------
968
+ dict[str, Any]
969
+ A dictionary containing only the relevant file attributes for Rucio's REST API.
611
970
  """
612
971
  replica = {}
613
972
  replica['scope'] = file['did_scope']
@@ -637,27 +996,67 @@ class UploadClient:
637
996
  sign_service: Optional[str] = None
638
997
  ) -> Optional[str]:
639
998
  """
640
- Uploads a file to the connected storage.
641
-
642
- :param rse_settings: dictionary containing the RSE settings
643
- :param rse_attributes: dictionary containing the RSE attribute key value pairs
644
- :param lfn: a single dict containing 'scope' and 'name'.
645
- Example:
646
- {'name': '1_rse_local_put.raw', 'scope': 'user.jdoe', 'filesize': 42, 'adler32': '87HS3J968JSNWID'}
647
- If the 'filename' key is present, it will be used by Rucio as the actual name of the file on disk (separate from the Rucio 'name').
648
- :param source_dir: path to the local directory including the source files
649
- :param force_pfn: use the given PFN -- can lead to dark data, use sparingly
650
- :param force_scheme: use the given protocol scheme, overriding the protocol priority in the RSE description
651
- :param transfer_timeout: set this timeout (in seconds) for the transfers, for protocols that support it
652
- :param sign_service: use the given service (e.g. gcs, s3, swift) to sign the URL
653
-
654
- :raises RucioException(msg): general exception with msg for more details.
999
+ Perform the actual file transfer to an RSE using the appropriate protocol.
1000
+
1001
+ This method is used once all necessary file information is resolved (logical file
1002
+ name, checksums, etc.). It creates and verifies the physical file name (PFN),
1003
+ optionally removes or overwrites stale replicas, uploads the file (potentially via
1004
+ a temporary PFN suffix), checks its size/checksum consistency, and finalizes it
1005
+ under the expected PFN.
1006
+
1007
+ Parameters
1008
+ ----------
1009
+ rse_settings
1010
+ Dictionary containing the RSE configuration.
1011
+ rse_attributes
1012
+ Additional attributes of the RSE (e.g. 'archive_timeout').
1013
+ lfn
1014
+ An optional dictionary describing the logical file (e.g., {'name': '1_rse_local_put.raw',
1015
+ 'scope': 'user.jdoe', ..}). If the 'filename' key is present, it overrides 'name'
1016
+ in determining the local file name to read from source_dir.
1017
+ source_dir
1018
+ Local source directory path where the file to be uploaded resides.
1019
+ domain
1020
+ Network domain for the upload, commonly 'wan' for wide-area networks.
1021
+ impl
1022
+ Name of the protocol implementation to be enforced (if any).
1023
+ force_pfn
1024
+ If provided, forces the use of this PFN for the file location on the storage
1025
+ (use with care since it can lead to "dark" data).
1026
+ force_scheme
1027
+ If provided, forces the protocol scheme (e.g. 'davs', 'https') to be used.
1028
+ transfer_timeout
1029
+ Timeout (in seconds) for the transfer operation before it fails.
1030
+ delete_existing
1031
+ If True, removes any unregistered or stale file on the storage that matches this PFN.
1032
+ sign_service
1033
+ If set, requests a signed URL from the given service (e.g., gcs, s3, swift).
1034
+
1035
+ Returns
1036
+ -------
1037
+ Optional[str]
1038
+ The final PFN (physical file name) of the successfully uploaded file, or None
1039
+ if creation failed.
1040
+
1041
+ Raises
1042
+ ------
1043
+ FileReplicaAlreadyExists
1044
+ If the target file already exists and overwrite is not allowed.
1045
+ RSEOperationNotSupported
1046
+ If storage-side operations (delete/rename/put) are not supported or fail.
1047
+ RucioException
1048
+ If renaming or other critical operations cannot be completed.
655
1049
  """
1050
+
656
1051
  logger = self.logger
657
1052
 
658
1053
  # Construct protocol for write operation.
659
1054
  # IMPORTANT: All upload stat() checks are always done with the write_protocol EXCEPT for cloud resources (signed URL for write cannot be used for read)
660
- protocol_write = self._create_protocol(rse_settings, 'write', force_scheme=force_scheme, domain=domain, impl=impl)
1055
+ protocol_write = self._create_protocol(rse_settings,
1056
+ 'write',
1057
+ force_scheme=force_scheme,
1058
+ domain=domain,
1059
+ impl=impl)
661
1060
 
662
1061
  base_name = lfn.get('filename', lfn['name'])
663
1062
  name = lfn.get('name', base_name)
@@ -682,44 +1081,58 @@ class UploadClient:
682
1081
 
683
1082
  # Auth. mostly for object stores
684
1083
  if sign_service:
685
- protocol_read = self._create_protocol(rse_settings, 'read', domain=domain, impl=impl)
1084
+ protocol_read = self._create_protocol(rse_settings,
1085
+ 'read',
1086
+ domain=domain,
1087
+ impl=impl)
686
1088
  if pfn is not None:
687
1089
  signed_read_pfn = self.client.get_signed_url(rse_settings['rse'], sign_service, 'read', pfn)
688
1090
  pfn = self.client.get_signed_url(rse_settings['rse'], sign_service, 'write', pfn)
689
1091
 
690
- # Create a name of tmp file if renaming operation is supported
1092
+ # Create a name of tmp file if the renaming operation is supported
691
1093
  pfn_tmp = cast("str", '%s.rucio.upload' % pfn if protocol_write.renaming else pfn)
692
1094
  signed_read_pfn_tmp = '%s.rucio.upload' % signed_read_pfn if protocol_write.renaming else signed_read_pfn
693
1095
 
694
1096
  # Either DID exists or not register_after_upload
695
1097
  if protocol_write.overwrite is False and delete_existing is False:
696
1098
  if sign_service:
697
- # Construct protocol for read ONLY for cloud resources and get signed URL for GET
1099
+ # Construct protocol for read-ONLY for cloud resources and get signed URL for GET
698
1100
  if protocol_read.exists(signed_read_pfn):
699
- raise FileReplicaAlreadyExists('File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) # wrong exception ?
1101
+ raise FileReplicaAlreadyExists('File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) # wrong exception?
700
1102
  elif protocol_write.exists(pfn):
701
- raise FileReplicaAlreadyExists('File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) # wrong exception ?
1103
+ raise FileReplicaAlreadyExists(
1104
+ 'File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) # wrong exception?
702
1105
 
703
1106
  # Removing tmp from earlier attempts
704
- if (not sign_service and protocol_write.exists(pfn_tmp)) or (sign_service and protocol_read.exists(signed_read_pfn_tmp)):
1107
+ if (not sign_service and protocol_write.exists(pfn_tmp)) or (
1108
+ sign_service and protocol_read.exists(signed_read_pfn_tmp)):
705
1109
  logger(logging.DEBUG, 'Removing remains of previous upload attempts.')
706
1110
  try:
707
1111
  # Construct protocol for delete operation.
708
- protocol_delete = self._create_protocol(rse_settings, 'delete', force_scheme=force_scheme, domain=domain, impl=impl)
1112
+ protocol_delete = self._create_protocol(rse_settings,
1113
+ 'delete',
1114
+ force_scheme=force_scheme,
1115
+ domain=domain,
1116
+ impl=impl)
709
1117
  delete_pfn = '%s.rucio.upload' % list(protocol_delete.lfns2pfns(make_valid_did(lfn)).values())[0]
710
1118
  if sign_service:
711
1119
  delete_pfn = self.client.get_signed_url(rse_settings['rse'], sign_service, 'delete', delete_pfn)
712
1120
  protocol_delete.delete(delete_pfn)
713
1121
  protocol_delete.close()
714
1122
  except Exception as error:
715
- raise RSEOperationNotSupported('Unable to remove temporary file %s.rucio.upload: %s' % (pfn, str(error)))
1123
+ raise RSEOperationNotSupported(
1124
+ 'Unable to remove temporary file %s.rucio.upload: %s' % (pfn, str(error)))
716
1125
 
717
1126
  # Removing not registered files from earlier attempts
718
1127
  if delete_existing:
719
1128
  logger(logging.DEBUG, 'Removing not-registered remains of previous upload attempts.')
720
1129
  try:
721
1130
  # Construct protocol for delete operation.
722
- protocol_delete = self._create_protocol(rse_settings, 'delete', force_scheme=force_scheme, domain=domain, impl=impl)
1131
+ protocol_delete = self._create_protocol(rse_settings,
1132
+ 'delete',
1133
+ force_scheme=force_scheme,
1134
+ domain=domain,
1135
+ impl=impl)
723
1136
  delete_pfn = '%s' % list(protocol_delete.lfns2pfns(make_valid_did(lfn)).values())[0]
724
1137
  if sign_service:
725
1138
  delete_pfn = self.client.get_signed_url(rse_settings['rse'], sign_service, 'delete', delete_pfn)
@@ -730,7 +1143,14 @@ class UploadClient:
730
1143
 
731
1144
  # Process the upload of the tmp file
732
1145
  try:
733
- retry(protocol_write.put, base_name, pfn_tmp, source_dir, transfer_timeout=transfer_timeout)(mtries=2, logger=logger)
1146
+ retry(protocol_write.put,
1147
+ base_name,
1148
+ pfn_tmp,
1149
+ source_dir,
1150
+ transfer_timeout=transfer_timeout)(
1151
+ mtries=2,
1152
+ logger=logger
1153
+ )
734
1154
  logger(logging.INFO, 'Successful upload of temporary file. {}'.format(pfn_tmp))
735
1155
  except Exception as error:
736
1156
  raise RSEOperationNotSupported(str(error))
@@ -750,17 +1170,20 @@ class UploadClient:
750
1170
  if ('filesize' in stats) and ('filesize' in lfn):
751
1171
  self.logger(logging.DEBUG, 'Filesize: Expected=%s Found=%s' % (lfn['filesize'], stats['filesize']))
752
1172
  if int(stats['filesize']) != int(lfn['filesize']):
753
- raise RucioException('Filesize mismatch. Source: %s Destination: %s' % (lfn['filesize'], stats['filesize']))
1173
+ raise RucioException(
1174
+ 'Filesize mismatch. Source: %s Destination: %s' % (lfn['filesize'], stats['filesize']))
754
1175
  if rse_settings['verify_checksum'] is not False:
755
1176
  if ('adler32' in stats) and ('adler32' in lfn):
756
- self.logger(logging.DEBUG, 'Checksum: Expected=%s Found=%s' % (lfn['adler32'], stats['adler32']))
1177
+ self.logger(logging.DEBUG,
1178
+ 'Checksum: Expected=%s Found=%s' % (lfn['adler32'], stats['adler32']))
757
1179
  if str(stats['adler32']).lstrip('0') != str(lfn['adler32']).lstrip('0'):
758
- raise RucioException('Checksum mismatch. Source: %s Destination: %s' % (lfn['adler32'], stats['adler32']))
1180
+ raise RucioException(
1181
+ 'Checksum mismatch. Source: %s Destination: %s' % (lfn['adler32'], stats['adler32']))
759
1182
 
760
1183
  except Exception as error:
761
1184
  raise error
762
1185
 
763
- # The upload finished successful and the file can be renamed
1186
+ # The upload finished successfully and the file can be renamed
764
1187
  try:
765
1188
  if protocol_write.renaming:
766
1189
  logger(logging.DEBUG, 'Renaming file %s to %s' % (pfn_tmp, pfn))
@@ -778,9 +1201,31 @@ class UploadClient:
778
1201
  pfn: str
779
1202
  ) -> dict[str, Any]:
780
1203
  """
781
- Try to stat file, on fail try again 1s, 2s, 4s, 8s, 16s, 32s later. Fail is all fail
782
- :param protocol: The protocol to use to reach this file
783
- :param pfn: Physical file name of the target for the protocol stat
1204
+ Attempt to retrieve file statistics with exponential backoff.
1205
+
1206
+ This method invokes `protocol.stat` a limited number of times, waiting with an
1207
+ exponential backoff between each attempt when an error occurs. After the configured
1208
+ number of retries, the method performs one final `stat` call and returns its result
1209
+ or lets any resulting exception propagate.
1210
+
1211
+ Parameters
1212
+ ----------
1213
+ protocol
1214
+ The RSEProtocol instance to use for retrieving file statistics
1215
+ pfn
1216
+ The physical file name (PFN) to be checked.
1217
+
1218
+ Returns
1219
+ -------
1220
+ dict[str, Any]
1221
+ A dictionary expected to include the filesize and adler32 for the provided pfn.
1222
+
1223
+ Raises
1224
+ ------
1225
+ RSEChecksumUnavailable
1226
+ If the protocol indicates a missing checksum for the file.
1227
+ Exception
1228
+ If the requested service is not available or permissions are not granted.
784
1229
  """
785
1230
  retries = config_get_int('client', 'protocol_stat_retries', raise_exception=False, default=6)
786
1231
  for attempt in range(retries):
@@ -800,8 +1245,8 @@ class UploadClient:
800
1245
  fail_str = ['The requested service is not available at the moment', 'Permission refused']
801
1246
  if any(x in str(error) for x in fail_str):
802
1247
  raise error
803
- self.logger(logging.DEBUG, 'stat: unknown edge case, retrying in %ss' % 2**attempt)
804
- time.sleep(2**attempt)
1248
+ self.logger(logging.DEBUG, 'stat: unknown edge case, retrying in %ss' % 2 ** attempt)
1249
+ time.sleep(2 ** attempt)
805
1250
  return protocol.stat(pfn)
806
1251
 
807
1252
  def _create_protocol(
@@ -813,14 +1258,44 @@ class UploadClient:
813
1258
  domain: str = 'wan'
814
1259
  ) -> "RSEProtocol":
815
1260
  """
816
- Protocol construction.
817
- :param rse_settings: rse_settings
818
- :param operation: activity, e.g. read, write, delete etc.
819
- :param force_scheme: custom scheme
820
- :param auth_token: Optionally passing JSON Web Token (OIDC) string for authentication
1261
+ Creates and returns the protocol object for the requested RSE operation.
1262
+
1263
+ Establishes a connection using the specified parameters (scheme, domain, etc.)
1264
+ and returns a protocol instance capable of handling the requested operation.
1265
+
1266
+ Parameters
1267
+ ----------
1268
+ rse_settings
1269
+ The dictionary containing RSE configuration.
1270
+ operation
1271
+ The intended operation, such as 'read', 'write', or 'delete'.
1272
+ impl
1273
+ An optional override for the default protocol implementation.
1274
+ force_scheme
1275
+ If provided, forces the protocol to use this scheme.
1276
+ domain
1277
+ The network domain to be used, defaulting to 'wan'.
1278
+
1279
+ Returns
1280
+ -------
1281
+ "RSEProtocol"
1282
+ The instantiated `RSEProtocol` object.
1283
+
1284
+ Raises
1285
+ ------
1286
+ Exception
1287
+ If the protocol creation or connection attempt fails.
821
1288
  """
822
1289
  try:
823
- protocol = rsemgr.create_protocol(rse_settings, operation, scheme=force_scheme, domain=domain, impl=impl, auth_token=self.auth_token, logger=self.logger)
1290
+ protocol = rsemgr.create_protocol(
1291
+ rse_settings,
1292
+ operation,
1293
+ scheme=force_scheme,
1294
+ domain=domain,
1295
+ impl=impl,
1296
+ auth_token=self.auth_token,
1297
+ logger=self.logger
1298
+ )
824
1299
  protocol.connect()
825
1300
  except Exception as error:
826
1301
  self.logger(logging.WARNING, 'Failed to create protocol for operation: %s' % operation)
@@ -828,24 +1303,69 @@ class UploadClient:
828
1303
  raise error
829
1304
  return protocol
830
1305
 
831
- def _send_trace(self, trace: "TraceDict") -> None:
1306
+ def _send_trace(
1307
+ self,
1308
+ trace: "TraceDict"
1309
+ ) -> None:
832
1310
  """
833
- Checks if sending trace is allowed and send the trace.
1311
+ Sends the trace if tracing is enabled.
834
1312
 
835
- :param trace: the trace
1313
+ If `self.tracing` is True, this method uses Rucio's `send_trace` function to
1314
+ dispatch the provided trace object to Rucio host. Otherwise, it takes no action.
1315
+
1316
+ Parameters
1317
+ ----------
1318
+ trace
1319
+ The trace object to be sent.
836
1320
  """
837
1321
  if self.tracing:
838
1322
  send_trace(trace, self.client.trace_host, self.client.user_agent)
839
1323
 
840
- def _recursive(self, item: "FileToUploadDict") -> list["FileToUploadWithCollectedAndDatasetInfoDict"]:
1324
+ def _recursive(
1325
+ self,
1326
+ item: "FileToUploadDict"
1327
+ ) -> list["FileToUploadWithCollectedAndDatasetInfoDict"]:
841
1328
  """
842
- If the --recursive flag is set, it replicates the folder structure recursively into collections
843
- A folder only can have either other folders inside or files, but not both of them
844
- - If it has folders, the root folder will be a container
845
- - If it has files, the root folder will be a dataset
846
- - If it is empty, it does not create anything
847
-
848
- :param item: dictionary containing all descriptions of the files to upload
1329
+ Recursively inspects a folder and creates corresponding Rucio datasets or containers.
1330
+
1331
+ This method traverses the local path specified in the given dictionary `item` and
1332
+ interprets subfolders as either Rucio containers (if they themselves contain further
1333
+ subfolders) or datasets (if they only contain files). Files within these datasets
1334
+ are gathered into a list with additional upload information. The method also attempts
1335
+ to create and attach these datasets/containers in Rucio, replicating the folder
1336
+ structure.
1337
+
1338
+ Note:
1339
+ ------
1340
+ Currently, this method does not allow the top-level directory to contain both files
1341
+ and subdirectories.
1342
+
1343
+ Parameters
1344
+ ----------
1345
+ item
1346
+ A dictionary describing the local path and upload parameters.
1347
+ It must contain at least:
1348
+
1349
+ * **`rse`**:
1350
+ The target RSE for the upload.
1351
+
1352
+ * **`path`**:
1353
+ The local directory path to inspect.
1354
+
1355
+ * **`did_scope`** (optional):
1356
+ Custom scope for the resulting datasets/containers.
1357
+
1358
+ Returns
1359
+ -------
1360
+ list["FileToUploadWithCollectedAndDatasetInfoDict"]
1361
+ A list of file descriptors enriched with collected file information, each
1362
+ conforming to FileToUploadWithCollectedAndDatasetInfoDict.
1363
+
1364
+ Raises
1365
+ ------
1366
+ InputValidationError
1367
+ If a folder contains both files and subdirectories at its top level (invalid
1368
+ container/dataset structure).
849
1369
  """
850
1370
  files: list["FileToUploadWithCollectedAndDatasetInfoDict"] = []
851
1371
  datasets: list["DatasetDict"] = []
@@ -878,7 +1398,8 @@ class UploadClient:
878
1398
  elif len(dirs) > 0:
879
1399
  containers.append({'scope': scope, 'name': root.split('/')[-1]})
880
1400
  self.logger(logging.DEBUG, 'Appended container with DID %s:%s' % (scope, path))
881
- attach.extend([{'scope': scope, 'name': root.split('/')[-1], 'rse': rse, 'did': {'scope': scope, 'name': dir_}} for dir_ in dirs])
1401
+ attach.extend([{'scope': scope, 'name': root.split('/')[-1], 'rse': rse,
1402
+ 'did': {'scope': scope, 'name': dir_}} for dir_ in dirs])
882
1403
  elif len(dirs) == 0 and len(fnames) == 0:
883
1404
  self.logger(logging.WARNING, 'The folder %s is empty, skipping' % root)
884
1405
  continue
@@ -904,7 +1425,8 @@ class UploadClient:
904
1425
  self.logger(logging.INFO, 'DIDs attached to collection %s:%s' % (att['scope'], att['name']))
905
1426
  except RucioException as error:
906
1427
  self.logger(logging.ERROR, error)
907
- self.logger(logging.ERROR, 'It was not possible to attach to collection with DID %s:%s' % (att['scope'], att['name']))
1428
+ self.logger(logging.ERROR,
1429
+ 'It was not possible to attach to collection with DID %s:%s' % (att['scope'], att['name']))
908
1430
  return files
909
1431
 
910
1432
  def preferred_impl(
@@ -913,13 +1435,29 @@ class UploadClient:
913
1435
  domain: str
914
1436
  ) -> Optional[str]:
915
1437
  """
916
- Finds the optimum protocol impl preferred by the client and
917
- supported by the remote RSE.
918
-
919
- :param rse_settings: dictionary containing the RSE settings
920
- :param domain: The network domain, either 'wan' (default) or 'lan'
921
-
922
- :raises RucioException(msg): general exception with msg for more details.
1438
+ Select a suitable protocol implementation for read, write, and delete operations on
1439
+ the given RSE and domain.
1440
+
1441
+ This method checks the local client configuration (under the `[upload] preferred_impl`
1442
+ setting) and compares it against the list of protocols declared in `rse_settings`.
1443
+ It attempts to find a protocol that supports the required I/O operations (read,
1444
+ write, delete) in the specified domain. If multiple preferred protocols are listed
1445
+ in the config, it iterates in order and returns the first viable match.
1446
+
1447
+ Parameters
1448
+ ----------
1449
+ rse_settings
1450
+ A dictionary describing RSE details, including available protocols and their
1451
+ domains.
1452
+ domain
1453
+ The network domain (e.g., 'lan' or 'wan') in which the protocol must support
1454
+ all operations.
1455
+
1456
+ Returns
1457
+ -------
1458
+ Optional[str]
1459
+ The name of a protocol implementation that can handle read/write/delete
1460
+ for the specified domain, or None if no suitable protocol was found.
923
1461
  """
924
1462
  preferred_protocols = []
925
1463
  supported_impl = None
@@ -941,28 +1479,34 @@ class UploadClient:
941
1479
  preferred_impls[i] = 'rucio.rse.protocols.' + impl
942
1480
  i += 1
943
1481
 
944
- preferred_protocols = [protocol for protocol in reversed(rse_settings['protocols']) if protocol['impl'] in preferred_impls]
1482
+ preferred_protocols = [protocol for protocol in reversed(rse_settings['protocols']) if
1483
+ protocol['impl'] in preferred_impls]
945
1484
 
946
1485
  if len(preferred_protocols) > 0:
947
- preferred_protocols += [protocol for protocol in reversed(rse_settings['protocols']) if protocol not in preferred_protocols]
1486
+ preferred_protocols += [protocol for protocol in reversed(rse_settings['protocols']) if
1487
+ protocol not in preferred_protocols]
948
1488
  else:
949
1489
  preferred_protocols = reversed(rse_settings['protocols'])
950
1490
 
951
1491
  for protocol in preferred_protocols:
952
1492
  if domain not in list(protocol['domains'].keys()):
953
- self.logger(logging.DEBUG, 'Unsuitable protocol "%s": Domain %s not supported' % (protocol['impl'], domain))
1493
+ self.logger(logging.DEBUG,
1494
+ 'Unsuitable protocol "%s": Domain %s not supported' % (protocol['impl'], domain))
954
1495
  continue
955
1496
  if not all(operations in protocol['domains'][domain] for operations in ("read", "write", "delete")):
956
- self.logger(logging.DEBUG, 'Unsuitable protocol "%s": All operations are not supported' % (protocol['impl']))
1497
+ self.logger(logging.DEBUG,
1498
+ 'Unsuitable protocol "%s": All operations are not supported' % (protocol['impl']))
957
1499
  continue
958
1500
  try:
959
- supported_protocol = rsemgr.create_protocol(rse_settings, 'write', domain=domain, impl=protocol['impl'], auth_token=self.auth_token, logger=self.logger)
1501
+ supported_protocol = rsemgr.create_protocol(rse_settings, 'write', domain=domain, impl=protocol['impl'],
1502
+ auth_token=self.auth_token, logger=self.logger)
960
1503
  supported_protocol.connect()
961
1504
  except Exception as error:
962
1505
  self.logger(logging.DEBUG, 'Failed to create protocol "%s", exception: %s' % (protocol['impl'], error))
963
1506
  pass
964
1507
  else:
965
- self.logger(logging.INFO, 'Preferred protocol impl supported locally and remotely: %s' % (protocol['impl']))
1508
+ self.logger(logging.INFO,
1509
+ 'Preferred protocol impl supported locally and remotely: %s' % (protocol['impl']))
966
1510
  supported_impl = protocol['impl']
967
1511
  break
968
1512