rucio-clients 37.5.0__py3-none-any.whl → 37.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rucio-clients might be problematic. Click here for more details.
- rucio/cli/bin_legacy/rucio.py +1 -1
- rucio/cli/bin_legacy/rucio_admin.py +1 -1
- rucio/cli/did.py +2 -2
- rucio/cli/rse.py +2 -3
- rucio/cli/subscription.py +1 -1
- rucio/client/baseclient.py +5 -1
- rucio/client/didclient.py +16 -16
- rucio/client/downloadclient.py +14 -14
- rucio/client/lockclient.py +3 -3
- rucio/client/replicaclient.py +2 -2
- rucio/client/touchclient.py +1 -1
- rucio/client/uploadclient.py +725 -181
- rucio/common/config.py +1 -2
- rucio/common/constants.py +2 -0
- rucio/common/didtype.py +2 -2
- rucio/common/pcache.py +20 -25
- rucio/common/plugins.py +10 -17
- rucio/common/schema/__init__.py +7 -5
- rucio/common/utils.py +1 -1
- rucio/rse/protocols/ngarc.py +2 -2
- rucio/rse/protocols/srm.py +1 -1
- rucio/rse/protocols/webdav.py +8 -1
- rucio/vcsversion.py +3 -3
- {rucio_clients-37.5.0.dist-info → rucio_clients-37.6.0.dist-info}/METADATA +1 -1
- {rucio_clients-37.5.0.dist-info → rucio_clients-37.6.0.dist-info}/RECORD +36 -36
- {rucio_clients-37.5.0.data → rucio_clients-37.6.0.data}/data/etc/rse-accounts.cfg.template +0 -0
- {rucio_clients-37.5.0.data → rucio_clients-37.6.0.data}/data/etc/rucio.cfg.atlas.client.template +0 -0
- {rucio_clients-37.5.0.data → rucio_clients-37.6.0.data}/data/etc/rucio.cfg.template +0 -0
- {rucio_clients-37.5.0.data → rucio_clients-37.6.0.data}/data/requirements.client.txt +0 -0
- {rucio_clients-37.5.0.data → rucio_clients-37.6.0.data}/data/rucio_client/merge_rucio_configs.py +0 -0
- {rucio_clients-37.5.0.data → rucio_clients-37.6.0.data}/scripts/rucio +0 -0
- {rucio_clients-37.5.0.data → rucio_clients-37.6.0.data}/scripts/rucio-admin +0 -0
- {rucio_clients-37.5.0.dist-info → rucio_clients-37.6.0.dist-info}/WHEEL +0 -0
- {rucio_clients-37.5.0.dist-info → rucio_clients-37.6.0.dist-info}/licenses/AUTHORS.rst +0 -0
- {rucio_clients-37.5.0.dist-info → rucio_clients-37.6.0.dist-info}/licenses/LICENSE +0 -0
- {rucio_clients-37.5.0.dist-info → rucio_clients-37.6.0.dist-info}/top_level.txt +0 -0
rucio/client/uploadclient.py
CHANGED
|
@@ -56,18 +56,32 @@ if TYPE_CHECKING:
|
|
|
56
56
|
|
|
57
57
|
|
|
58
58
|
class UploadClient:
|
|
59
|
-
|
|
60
59
|
def __init__(
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
60
|
+
self,
|
|
61
|
+
_client: Optional[Client] = None,
|
|
62
|
+
logger: Optional["LoggerFunction"] = None,
|
|
63
|
+
tracing: bool = True
|
|
65
64
|
):
|
|
66
65
|
"""
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
66
|
+
Initialize the UploadClient with the necessary configuration to manage file uploads.
|
|
67
|
+
|
|
68
|
+
This method is used to create a new UploadClient instance that can upload files. It
|
|
69
|
+
allows the use of an existing Rucio Client, a custom logger, and tracing for debug
|
|
70
|
+
information during the upload process.
|
|
71
|
+
|
|
72
|
+
Parameters
|
|
73
|
+
----------
|
|
74
|
+
_client
|
|
75
|
+
An existing Rucio `Client` instance to reuse. If not provided, a new one is created.
|
|
76
|
+
logger
|
|
77
|
+
A logger function. If not provided, the default Python logger is used.
|
|
78
|
+
tracing
|
|
79
|
+
Indicates whether to enable tracing to capture upload activity details.
|
|
80
|
+
|
|
81
|
+
Raises
|
|
82
|
+
------
|
|
83
|
+
InputValidationError
|
|
84
|
+
If the client account is not found or is invalid, preventing upload setup.
|
|
71
85
|
"""
|
|
72
86
|
if not logger:
|
|
73
87
|
self.logger = logging.log
|
|
@@ -77,7 +91,9 @@ class UploadClient:
|
|
|
77
91
|
self.client: Final[Client] = _client if _client else Client()
|
|
78
92
|
self.client_location = detect_client_location()
|
|
79
93
|
# if token should be used, use only JWT tokens
|
|
80
|
-
self.auth_token: Optional[str] =
|
|
94
|
+
self.auth_token: Optional[str] = (
|
|
95
|
+
self.client.auth_token if len(self.client.auth_token.split(".")) == 3 else None
|
|
96
|
+
)
|
|
81
97
|
self.tracing = tracing
|
|
82
98
|
if not self.tracing:
|
|
83
99
|
logger(logging.DEBUG, 'Tracing is turned off.')
|
|
@@ -86,10 +102,14 @@ class UploadClient:
|
|
|
86
102
|
try:
|
|
87
103
|
acc = self.client.whoami()
|
|
88
104
|
if acc is None:
|
|
89
|
-
raise InputValidationError(
|
|
105
|
+
raise InputValidationError(
|
|
106
|
+
'Account not specified and rucio has no account with your identity'
|
|
107
|
+
)
|
|
90
108
|
self.client.account = acc['account']
|
|
91
109
|
except RucioException as e:
|
|
92
|
-
raise InputValidationError(
|
|
110
|
+
raise InputValidationError(
|
|
111
|
+
f'Account not specified and problem with rucio: {e}'
|
|
112
|
+
)
|
|
93
113
|
self.logger(logging.DEBUG, 'Discovered account as "%s"' % self.client.account)
|
|
94
114
|
self.default_file_scope: Final[str] = 'user.' + self.client.account
|
|
95
115
|
self.rses = {}
|
|
@@ -112,34 +132,204 @@ class UploadClient:
|
|
|
112
132
|
activity: Optional[str] = None
|
|
113
133
|
) -> int:
|
|
114
134
|
"""
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
135
|
+
Uploads one or more files to an RSE (Rucio Storage Element) and optionally registers them.
|
|
136
|
+
|
|
137
|
+
An overview of this method's performed actions:
|
|
138
|
+
1. Collects and validates file info from the passed `items` (directories may be
|
|
139
|
+
also included), ensuring valid paths exist on the local filesystem. If an RSE
|
|
140
|
+
expression is provided, a single RSE is picked at random from it.
|
|
141
|
+
|
|
142
|
+
2. Checks the RSE's availability for writing (unless `ignore_availability` is True).
|
|
143
|
+
|
|
144
|
+
3. Optionally registers each file in the Rucio Catalog, handling the DID creation,
|
|
145
|
+
dataset creation/attachment, and replication rules as needed.
|
|
146
|
+
|
|
147
|
+
4. Uploads the files using the underlying protocol handlers and verifies checksums
|
|
148
|
+
if desired/possible. Partial or failed uploads raise exceptions.
|
|
149
|
+
|
|
150
|
+
5. (Optional) Produces a JSON summary file at `summary_file_path`, listing the final
|
|
151
|
+
PFNs, checksums, and other info for all successfully uploaded files.
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
items
|
|
156
|
+
A sequence of dictionaries, each describing a file to upload (or a
|
|
157
|
+
directory to be scanned). For each item, the supported keys are:
|
|
158
|
+
|
|
159
|
+
* **`path`** (PathTypeAlias, required):
|
|
160
|
+
The local path to the file or directory. If this is a directory and
|
|
161
|
+
`recursive` is True, the directory (and its subdirectories) are traversed.
|
|
162
|
+
|
|
163
|
+
* **`rse`** (str, required):
|
|
164
|
+
The target RSE or an RSE expression where the upload should be placed. If
|
|
165
|
+
an expression is provided (e.g., "tier=1"), one RSE from that expression
|
|
166
|
+
is chosen randomly.
|
|
167
|
+
|
|
168
|
+
* **`did_scope`** (str, not required):
|
|
169
|
+
The Rucio scope in which to register the file DID. Defaults to `user.<account>`.
|
|
170
|
+
|
|
171
|
+
* **`did_name`** (str, not required):
|
|
172
|
+
The logical filename in Rucio. Defaults to the local basename if not provided.
|
|
173
|
+
|
|
174
|
+
* **`lifetime`** (int, not required):
|
|
175
|
+
The lifetime (in seconds) to apply when creating a new replication rule.
|
|
176
|
+
For file uploads without a dataset, a new rule with that lifetime is created
|
|
177
|
+
if the file DID does not already exist in Rucio. For a new dataset, the
|
|
178
|
+
dataset is created with a rule using this lifetime, but if the dataset
|
|
179
|
+
already exists and you specify a lifetime, an error is raised.
|
|
180
|
+
|
|
181
|
+
_**Note:**_ **`lifetime`** is not automatically applied to nested containers
|
|
182
|
+
or datasets in recursive mode.
|
|
183
|
+
|
|
184
|
+
* **`impl`** (str, not required):
|
|
185
|
+
Name of the protocol implementation to be used for uploading this item.
|
|
186
|
+
For example, `"rucio.rse.protocols.gfal.Default"`.
|
|
187
|
+
|
|
188
|
+
* **`pfn`** (str, not required):
|
|
189
|
+
Allows you to explicitly set the Physical File Name (PFN) for the upload,
|
|
190
|
+
determining exactly where the file is placed on the storage. However, for
|
|
191
|
+
deterministic RSEs, specifying a PFN causes the client to skip registering
|
|
192
|
+
the file under the usual deterministic scheme. For non-deterministic RSEs,
|
|
193
|
+
you can still force the file to be registered in the Rucio catalog after
|
|
194
|
+
being uploaded, using `no_register=False` along with `register_after_upload=True`
|
|
195
|
+
(or by manually handling the registration later).
|
|
196
|
+
|
|
197
|
+
* **`force_scheme`** (str, not required):
|
|
198
|
+
Enforces the use of a specific protocol scheme (e.g., davs, https) during
|
|
199
|
+
file uploads. If the selected protocol is not compatible, the upload will
|
|
200
|
+
stop and raise an error instead of falling back to any other scheme.
|
|
201
|
+
|
|
202
|
+
* **`transfer_timeout`** (int, not required):
|
|
203
|
+
A maximum duration (in seconds) to wait for each individual file transfer
|
|
204
|
+
to complete. If the file transfer does not finish before this timeout
|
|
205
|
+
elapses, the operation will be aborted and retried one last time. When
|
|
206
|
+
transfer_timeout is None, no specific timeout is enforced, and the transfer
|
|
207
|
+
may continue until it completes or fails for another reason.
|
|
208
|
+
|
|
209
|
+
* **`guid`** (str, not required):
|
|
210
|
+
If provided, Rucio will use this GUID. If not provided and the file is
|
|
211
|
+
“pool.root” with `no_register` unset, Rucio tries to extract the GUID via
|
|
212
|
+
`pool_extractFileIdentifier`, raising an error if that fails. Otherwise, a
|
|
213
|
+
random GUID will be generated.
|
|
214
|
+
|
|
215
|
+
* **`no_register`** (bool, not required, default=False):
|
|
216
|
+
If set to True, the file is not registered in the Rucio Catalog, i.e., there
|
|
217
|
+
is no DID creation, no replica entry, and no rules. This is appropriate if
|
|
218
|
+
you plan to register the replica or create rules separately.
|
|
219
|
+
|
|
220
|
+
_**Note:**_ If **`recursive`**=True, the method still creates datasets
|
|
221
|
+
and/or containers for the directories when needed.
|
|
222
|
+
|
|
223
|
+
* **`register_after_upload`** (bool, not required, default=False):
|
|
224
|
+
If set to True, the file is uploaded first, and only then is the DID created
|
|
225
|
+
or updated in the Catalog. This can be useful when you want the actual data
|
|
226
|
+
on storage before finalizing the registration. By default (False), the file
|
|
227
|
+
is registered in Rucio before the physical upload if `no_register` is False.
|
|
228
|
+
|
|
229
|
+
* **`recursive`** (bool, not required, default=False):
|
|
230
|
+
If set to `True`, the method treats the specified path as a directory and
|
|
231
|
+
(depending on the combination with other parameters) recursively traverses
|
|
232
|
+
its subdirectories, mapping them into container/dataset hierarchies. Single
|
|
233
|
+
top-level file paths are ignored, but individual files found in subdirectories
|
|
234
|
+
are processed. Empty directories or non-existent paths also produce a warning.
|
|
235
|
+
If `False`, then top-level file paths or the direct children-files of the
|
|
236
|
+
given top-level directory are only processed (subdirectories are ignored,
|
|
237
|
+
and no container structure is created).
|
|
238
|
+
|
|
239
|
+
* **`dataset_scope`** / **`dataset_name`** (str, not required):
|
|
240
|
+
To register uploaded files into a dataset DID, you need to specify both
|
|
241
|
+
dataset_name and dataset_scope. With no_register=False, the client ensures
|
|
242
|
+
{dataset_scope}:{dataset_name} exists (creating it with a replication rule
|
|
243
|
+
if it doesn't), or simply attaching new files if it does. If the dataset
|
|
244
|
+
already exists and you specify a new lifetime, or if a checksum mismatch
|
|
245
|
+
is detected, registration fails. In non-recursive mode, only files in the
|
|
246
|
+
top-level directory are attached to the dataset and subdirectories are
|
|
247
|
+
skipped with a warning. In recursive mode, the client aims to create
|
|
248
|
+
containers for directories containing only subdirectories and datasets for
|
|
249
|
+
directories containing only files (raising an error if the top-level folder
|
|
250
|
+
mixes files and directories). If the top-level directory has subdirectories,
|
|
251
|
+
the user-supplied dataset_name is effectively ignored at that level (each
|
|
252
|
+
subdirectory becomes its own dataset or container); if there are no
|
|
253
|
+
subdirectories, the entire folder is registered as a single dataset.
|
|
254
|
+
|
|
255
|
+
* **`dataset_meta`** (dict, not required):
|
|
256
|
+
Additional metadata (e.g., `{'project': 'myProject'}`) to attach to the
|
|
257
|
+
newly created dataset when: the dataset does not already exist, `recursive=False`,
|
|
258
|
+
`no_register=False` and both `dataset_scope` and `dataset_name` are provided.
|
|
259
|
+
|
|
260
|
+
_**Note:**_ If multiple files share the same `dataset_scope` and `dataset_name`,
|
|
261
|
+
then if a dataset is created, it considers only the first item’s dataset_meta.
|
|
262
|
+
summary_file_path
|
|
263
|
+
If specified, a JSON file is created with a summary of each successfully
|
|
264
|
+
uploaded file, including checksum, PFN, scope, and name entries.
|
|
265
|
+
traces_copy_out
|
|
266
|
+
A list reference for collecting the trace dictionaries that Rucio generates
|
|
267
|
+
while iterating over each file. A new trace dictionary is appended to this list
|
|
268
|
+
for each file considered (even those ultimately skipped or already on the RSE).
|
|
269
|
+
ignore_availability
|
|
270
|
+
If set to True, the RSE's "write availability" is not enforced. By default,
|
|
271
|
+
this is False, and an RSE marked as unavailable for writing will raise an error.
|
|
272
|
+
activity
|
|
273
|
+
If you are uploading files without a parent dataset, this string sets the “activity”
|
|
274
|
+
on the replication rule that Rucio creates for each file (e.g., "Analysis"),
|
|
275
|
+
which can affect RSE queue priorities.
|
|
276
|
+
|
|
277
|
+
_**Note:**_ If your files are uploaded into a dataset, the dataset’s replication
|
|
278
|
+
rule does not use this activity parameter.
|
|
279
|
+
|
|
280
|
+
Returns
|
|
281
|
+
-------
|
|
282
|
+
int
|
|
283
|
+
Status code (``0`` if all files were uploaded successfully).
|
|
284
|
+
|
|
285
|
+
Raises
|
|
286
|
+
------
|
|
287
|
+
NoFilesUploaded
|
|
288
|
+
Raised if none of the requested files could be uploaded.
|
|
289
|
+
NotAllFilesUploaded
|
|
290
|
+
Raised if some files were successfully uploaded, but others failed.
|
|
291
|
+
RSEWriteBlocked
|
|
292
|
+
Raised if `ignore_availability=False` but the chosen RSE does not allow writing.
|
|
293
|
+
InputValidationError
|
|
294
|
+
Raised if mandatory fields are missing, if conflicting DIDs are found,
|
|
295
|
+
or if no valid files remain after input parsing.
|
|
296
|
+
|
|
297
|
+
Examples
|
|
298
|
+
--------
|
|
299
|
+
??? Example
|
|
300
|
+
|
|
301
|
+
Upload a single local file to the *CERN-PROD* RSE and write a JSON summary to
|
|
302
|
+
``upload_summary.json``:
|
|
303
|
+
|
|
304
|
+
```python
|
|
305
|
+
from rucio.client.uploadclient import UploadClient
|
|
306
|
+
upload_client = UploadClient()
|
|
307
|
+
items = [
|
|
308
|
+
{"path": "/data/file1.txt",
|
|
309
|
+
"rse": "CERN-PROD", # target RSE
|
|
310
|
+
"did_scope": "user.alice", # optional; defaults to user.<account>
|
|
311
|
+
"did_name": "file1.txt"} # optional; defaults to basename
|
|
312
|
+
]
|
|
313
|
+
upload_client.upload(items, summary_file_path="upload_summary.json")
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
Recursively upload every file found under ``/data/dataset`` into a new
|
|
317
|
+
dataset ``user.alice:mydataset`` on a random RSE that matches the
|
|
318
|
+
expression ``tier=1``; collect per-file *trace* dictionaries for later
|
|
319
|
+
inspection:
|
|
320
|
+
|
|
321
|
+
```python
|
|
322
|
+
traces: list[TraceBaseDict] = []
|
|
323
|
+
dir_item = {
|
|
324
|
+
"path": "/data/dataset",
|
|
325
|
+
"rse": "tier=1", # RSE expression; one will be chosen
|
|
326
|
+
"recursive": True,
|
|
327
|
+
"dataset_scope": "user.alice",
|
|
328
|
+
"dataset_name": "mydataset",
|
|
329
|
+
"dataset_meta": {"project": "demo"},
|
|
330
|
+
}
|
|
331
|
+
upload_client.upload([dir_item], traces_copy_out=traces)
|
|
332
|
+
```
|
|
143
333
|
"""
|
|
144
334
|
# helper to get rse from rse_expression:
|
|
145
335
|
def _pick_random_rse(rse_expression: str) -> dict[str, Any]:
|
|
@@ -198,7 +388,7 @@ class UploadClient:
|
|
|
198
388
|
delete_existing = False
|
|
199
389
|
|
|
200
390
|
trace = copy.deepcopy(self.trace)
|
|
201
|
-
# appending trace to list reference
|
|
391
|
+
# appending trace to the list reference if the reference exists
|
|
202
392
|
if traces_copy_out is not None:
|
|
203
393
|
traces_copy_out.append(trace)
|
|
204
394
|
|
|
@@ -218,7 +408,8 @@ class UploadClient:
|
|
|
218
408
|
logger(logging.ERROR, 'PFN has to be defined for NON-DETERMINISTIC RSE.')
|
|
219
409
|
continue
|
|
220
410
|
if pfn and is_deterministic:
|
|
221
|
-
logger(logging.WARNING,
|
|
411
|
+
logger(logging.WARNING,
|
|
412
|
+
'Upload with given pfn implies that no_register is True, except non-deterministic RSEs')
|
|
222
413
|
no_register = True
|
|
223
414
|
|
|
224
415
|
# resolving local area networks
|
|
@@ -228,7 +419,7 @@ class UploadClient:
|
|
|
228
419
|
rse_attributes = self.client.list_rse_attributes(rse)
|
|
229
420
|
except:
|
|
230
421
|
logger(logging.WARNING, 'Attributes of the RSE: %s not available.' % rse)
|
|
231
|
-
if
|
|
422
|
+
if self.client_location and 'lan' in rse_settings['domain'] and RseAttr.SITE in rse_attributes:
|
|
232
423
|
if self.client_location['site'] == rse_attributes[RseAttr.SITE]:
|
|
233
424
|
domain = 'lan'
|
|
234
425
|
logger(logging.DEBUG, '{} domain is used for the upload'.format(domain))
|
|
@@ -240,12 +431,22 @@ class UploadClient:
|
|
|
240
431
|
# impl = self.preferred_impl(rse_settings, domain)
|
|
241
432
|
|
|
242
433
|
if not no_register and not register_after_upload:
|
|
243
|
-
self._register_file(file,
|
|
434
|
+
self._register_file(file,
|
|
435
|
+
registered_dataset_dids,
|
|
436
|
+
ignore_availability=ignore_availability,
|
|
437
|
+
activity=activity)
|
|
244
438
|
|
|
245
|
-
# if register_after_upload, file should be overwritten if it is not registered
|
|
246
|
-
# otherwise if file already exists on RSE we're done
|
|
439
|
+
# if register_after_upload, the file should be overwritten if it is not registered,
|
|
440
|
+
# otherwise if the file already exists on RSE we're done
|
|
247
441
|
if register_after_upload:
|
|
248
|
-
if rsemgr.exists(rse_settings,
|
|
442
|
+
if rsemgr.exists(rse_settings,
|
|
443
|
+
pfn if pfn else file_did, # type: ignore (pfn is str)
|
|
444
|
+
domain=domain,
|
|
445
|
+
scheme=force_scheme,
|
|
446
|
+
impl=impl,
|
|
447
|
+
auth_token=self.auth_token,
|
|
448
|
+
vo=self.client.vo,
|
|
449
|
+
logger=logger):
|
|
249
450
|
try:
|
|
250
451
|
self.client.get_did(file['did_scope'], file['did_name'])
|
|
251
452
|
logger(logging.INFO, 'File already registered. Skipping upload.')
|
|
@@ -255,22 +456,48 @@ class UploadClient:
|
|
|
255
456
|
logger(logging.INFO, 'File already exists on RSE. Previous left overs will be overwritten.')
|
|
256
457
|
delete_existing = True
|
|
257
458
|
elif not is_deterministic and not no_register:
|
|
258
|
-
if rsemgr.exists(rse_settings,
|
|
259
|
-
|
|
459
|
+
if rsemgr.exists(rse_settings,
|
|
460
|
+
pfn, # type: ignore (pfn is str)
|
|
461
|
+
domain=domain,
|
|
462
|
+
scheme=force_scheme,
|
|
463
|
+
impl=impl,
|
|
464
|
+
auth_token=self.auth_token,
|
|
465
|
+
vo=self.client.vo,
|
|
466
|
+
logger=logger):
|
|
467
|
+
logger(logging.INFO,
|
|
468
|
+
'File already exists on RSE with given pfn. Skipping upload. Existing replica has to be removed first.')
|
|
260
469
|
trace['stateReason'] = 'File already exists'
|
|
261
470
|
continue
|
|
262
|
-
elif rsemgr.exists(rse_settings,
|
|
471
|
+
elif rsemgr.exists(rse_settings,
|
|
472
|
+
file_did,
|
|
473
|
+
domain=domain,
|
|
474
|
+
scheme=force_scheme,
|
|
475
|
+
impl=impl,
|
|
476
|
+
auth_token=self.auth_token,
|
|
477
|
+
vo=self.client.vo,
|
|
478
|
+
logger=logger):
|
|
263
479
|
logger(logging.INFO, 'File already exists on RSE with different pfn. Skipping upload.')
|
|
264
480
|
trace['stateReason'] = 'File already exists'
|
|
265
481
|
continue
|
|
266
482
|
else:
|
|
267
|
-
if rsemgr.exists(rse_settings,
|
|
483
|
+
if rsemgr.exists(rse_settings,
|
|
484
|
+
pfn if pfn else file_did, # type: ignore (pfn is str)
|
|
485
|
+
domain=domain,
|
|
486
|
+
scheme=force_scheme,
|
|
487
|
+
impl=impl,
|
|
488
|
+
auth_token=self.auth_token,
|
|
489
|
+
vo=self.client.vo,
|
|
490
|
+
logger=logger):
|
|
268
491
|
logger(logging.INFO, 'File already exists on RSE. Skipping upload')
|
|
269
492
|
trace['stateReason'] = 'File already exists'
|
|
270
493
|
continue
|
|
271
494
|
|
|
272
495
|
# protocol handling and upload
|
|
273
|
-
protocols = rsemgr.get_protocols_ordered(rse_settings=rse_settings,
|
|
496
|
+
protocols = rsemgr.get_protocols_ordered(rse_settings=rse_settings,
|
|
497
|
+
operation='write',
|
|
498
|
+
scheme=force_scheme,
|
|
499
|
+
domain=domain,
|
|
500
|
+
impl=impl)
|
|
274
501
|
protocols.reverse()
|
|
275
502
|
success = False
|
|
276
503
|
state_reason = ''
|
|
@@ -279,11 +506,9 @@ class UploadClient:
|
|
|
279
506
|
protocol = protocols.pop()
|
|
280
507
|
cur_scheme = protocol['scheme']
|
|
281
508
|
logger(logging.INFO, 'Trying upload with %s to %s' % (cur_scheme, rse))
|
|
282
|
-
lfn: "LFNDict" = {
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
}
|
|
286
|
-
lfn['filename'] = basename
|
|
509
|
+
lfn: "LFNDict" = {'name': file['did_name'],
|
|
510
|
+
'scope': file['did_scope'],
|
|
511
|
+
'filename': basename}
|
|
287
512
|
|
|
288
513
|
for checksum_name in GLOBALLY_SUPPORTED_CHECKSUMS:
|
|
289
514
|
if checksum_name in file:
|
|
@@ -313,7 +538,10 @@ class UploadClient:
|
|
|
313
538
|
logger(logging.DEBUG, 'Upload done.')
|
|
314
539
|
success = True
|
|
315
540
|
file['upload_result'] = {0: True, 1: None, 'success': True, 'pfn': pfn} # TODO: needs to be removed
|
|
316
|
-
except (ServiceUnavailable,
|
|
541
|
+
except (ServiceUnavailable,
|
|
542
|
+
ResourceTemporaryUnavailable,
|
|
543
|
+
RSEOperationNotSupported,
|
|
544
|
+
RucioException) as error:
|
|
317
545
|
logger(logging.WARNING, 'Upload attempt failed')
|
|
318
546
|
logger(logging.INFO, 'Exception: %s' % str(error), exc_info=True)
|
|
319
547
|
state_reason = str(error)
|
|
@@ -331,7 +559,10 @@ class UploadClient:
|
|
|
331
559
|
registration_succeeded = True
|
|
332
560
|
if not no_register:
|
|
333
561
|
if register_after_upload:
|
|
334
|
-
self._register_file(file,
|
|
562
|
+
self._register_file(file,
|
|
563
|
+
registered_dataset_dids,
|
|
564
|
+
ignore_availability=ignore_availability,
|
|
565
|
+
activity=activity)
|
|
335
566
|
else:
|
|
336
567
|
replica_for_api = self._convert_file_for_api(file)
|
|
337
568
|
try:
|
|
@@ -341,10 +572,13 @@ class UploadClient:
|
|
|
341
572
|
logger(logging.ERROR, 'Failed to update replica state for file {}'.format(basename))
|
|
342
573
|
logger(logging.DEBUG, 'Details: {}'.format(str(error)))
|
|
343
574
|
|
|
344
|
-
# add file to dataset if needed
|
|
575
|
+
# add the file to dataset if needed
|
|
345
576
|
if dataset_did_str and not no_register:
|
|
346
577
|
try:
|
|
347
|
-
self.client.attach_dids(
|
|
578
|
+
self.client.attach_dids(
|
|
579
|
+
file['dataset_scope'], # type: ignore (`dataset_scope` always exists if `dataset_did_str`)
|
|
580
|
+
file['dataset_name'], # type: ignore (`dataset_name` always exists if `dataset_did_str`)
|
|
581
|
+
[file_did])
|
|
348
582
|
except Exception as error:
|
|
349
583
|
registration_succeeded = False
|
|
350
584
|
logger(logging.ERROR, 'Failed to attach file to the dataset')
|
|
@@ -386,15 +620,31 @@ class UploadClient:
|
|
|
386
620
|
raise NotAllFilesUploaded()
|
|
387
621
|
return 0
|
|
388
622
|
|
|
389
|
-
def _add_bittorrent_meta(
|
|
390
|
-
|
|
623
|
+
def _add_bittorrent_meta(
|
|
624
|
+
self,
|
|
625
|
+
file: "Mapping[str, Any]"
|
|
626
|
+
) -> None:
|
|
627
|
+
"""
|
|
628
|
+
Add BitTorrent v2 metadata to the file DID.
|
|
629
|
+
|
|
630
|
+
This method calculates the BitTorrent v2 pieces root, layers, and piece length for
|
|
631
|
+
the specified local file, and updates the file DID's metadata with these values.
|
|
632
|
+
|
|
633
|
+
Parameters
|
|
634
|
+
----------
|
|
635
|
+
file
|
|
636
|
+
A dictionary that must include 'dirname', 'basename', 'did_scope',
|
|
637
|
+
and 'did_name', describing the file path and the associated DID.
|
|
638
|
+
"""
|
|
639
|
+
pieces_root, pieces_layers, piece_length = bittorrent_v2_merkle_sha256(
|
|
640
|
+
os.path.join(file['dirname'], file['basename']))
|
|
391
641
|
bittorrent_meta = {
|
|
392
642
|
'bittorrent_pieces_root': base64.b64encode(pieces_root).decode(),
|
|
393
643
|
'bittorrent_pieces_layers': base64.b64encode(pieces_layers).decode(),
|
|
394
644
|
'bittorrent_piece_length': piece_length,
|
|
395
645
|
}
|
|
396
646
|
self.client.set_metadata_bulk(scope=file['did_scope'], name=file['did_name'], meta=bittorrent_meta)
|
|
397
|
-
self.logger(logging.INFO, f"Added
|
|
647
|
+
self.logger(logging.INFO, f"Added BitTorrent metadata to file DID {file['did_scope']}:{file['did_name']}")
|
|
398
648
|
|
|
399
649
|
def _register_file(
|
|
400
650
|
self,
|
|
@@ -404,17 +654,33 @@ class UploadClient:
|
|
|
404
654
|
activity: Optional[str] = None
|
|
405
655
|
) -> None:
|
|
406
656
|
"""
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
657
|
+
Register a single file DID in Rucio, optionally creating its parent dataset if needed.
|
|
658
|
+
|
|
659
|
+
Ensures that a file is known in the Rucio catalog under the specified scope. If a
|
|
660
|
+
dataset is specified in `file` and it does not yet exist, the method creates it and
|
|
661
|
+
attaches the file to that dataset, applying replication rules as appropriate. If no
|
|
662
|
+
dataset is provided and the file DID does not yet exist in Rucio, the method creates
|
|
663
|
+
a replication rule for the newly added file. If the file DID already exists, no new
|
|
664
|
+
top-level rule is created (the file’s existing rules or attachments remain unchanged).
|
|
665
|
+
Checksums are compared to prevent conflicts if the file is already registered.
|
|
666
|
+
|
|
667
|
+
Parameters
|
|
668
|
+
----------
|
|
669
|
+
file
|
|
670
|
+
A dictionary containing file information (e.g., 'did_scope', 'did_name', 'adler32', etc.).
|
|
671
|
+
registered_dataset_dids
|
|
672
|
+
A set of dataset DIDs already registered to avoid duplicates.
|
|
673
|
+
ignore_availability
|
|
674
|
+
If True, creates replication rules even when the RSE is marked unavailable.
|
|
675
|
+
activity
|
|
676
|
+
Specifies the transfer activity (e.g., 'User Subscriptions') for the replication rule.
|
|
677
|
+
|
|
678
|
+
Raises
|
|
679
|
+
------
|
|
680
|
+
InputValidationError
|
|
681
|
+
If a dataset already exists, but the caller attempts to set a new lifetime for it.
|
|
682
|
+
DataIdentifierAlreadyExists
|
|
683
|
+
If the local checksum differs from the remote checksum.
|
|
418
684
|
"""
|
|
419
685
|
logger = self.logger
|
|
420
686
|
logger(logging.DEBUG, 'Registering file')
|
|
@@ -426,7 +692,8 @@ class UploadClient:
|
|
|
426
692
|
except ScopeNotFound:
|
|
427
693
|
pass
|
|
428
694
|
if account_scopes and file['did_scope'] not in account_scopes:
|
|
429
|
-
logger(logging.WARNING,
|
|
695
|
+
logger(logging.WARNING,
|
|
696
|
+
'Scope {} not found for the account {}.'.format(file['did_scope'], self.client.account))
|
|
430
697
|
|
|
431
698
|
rse = file['rse']
|
|
432
699
|
dataset_did_str = file.get('dataset_did_str')
|
|
@@ -447,8 +714,8 @@ class UploadClient:
|
|
|
447
714
|
except DataIdentifierAlreadyExists:
|
|
448
715
|
logger(logging.INFO, 'Dataset %s already exists - no rule will be created' % dataset_did_str)
|
|
449
716
|
if file.get('lifetime') is not None:
|
|
450
|
-
raise InputValidationError(
|
|
451
|
-
|
|
717
|
+
raise InputValidationError(
|
|
718
|
+
'Dataset %s exists and lifetime %s given. Prohibited to modify parent dataset lifetime.' % (dataset_did_str, file.get('lifetime')))
|
|
452
719
|
else:
|
|
453
720
|
logger(logging.DEBUG, 'Skipping dataset registration')
|
|
454
721
|
|
|
@@ -457,16 +724,17 @@ class UploadClient:
|
|
|
457
724
|
file_did = {'scope': file_scope, 'name': file_name}
|
|
458
725
|
replica_for_api = self._convert_file_for_api(file)
|
|
459
726
|
try:
|
|
460
|
-
# if the remote checksum is different this
|
|
727
|
+
# if the remote checksum is different, this DID must not be used
|
|
461
728
|
meta = self.client.get_metadata(file_scope, file_name)
|
|
462
729
|
logger(logging.INFO, 'File DID already exists')
|
|
463
730
|
logger(logging.DEBUG, 'local checksum: %s, remote checksum: %s' % (file['adler32'], meta['adler32']))
|
|
464
731
|
|
|
465
732
|
if str(meta['adler32']).lstrip('0') != str(file['adler32']).lstrip('0'):
|
|
466
|
-
logger(logging.ERROR,
|
|
733
|
+
logger(logging.ERROR,
|
|
734
|
+
'Local checksum %s does not match remote checksum %s' % (file['adler32'], meta['adler32']))
|
|
467
735
|
raise DataIdentifierAlreadyExists
|
|
468
736
|
|
|
469
|
-
# add file to rse if it is not registered yet
|
|
737
|
+
# add the file to rse if it is not registered yet
|
|
470
738
|
replicastate = list(self.client.list_replicas([file_did], all_states=True))
|
|
471
739
|
if rse not in replicastate[0]['rses']:
|
|
472
740
|
self.client.add_replicas(rse=rse, files=[replica_for_api])
|
|
@@ -479,17 +747,51 @@ class UploadClient:
|
|
|
479
747
|
logger(logging.INFO, 'Successfully added replica in Rucio catalogue at %s' % rse)
|
|
480
748
|
if not dataset_did_str:
|
|
481
749
|
# only need to add rules for files if no dataset is given
|
|
482
|
-
self.client.add_replication_rule([file_did],
|
|
750
|
+
self.client.add_replication_rule([file_did],
|
|
751
|
+
copies=1,
|
|
752
|
+
rse_expression=rse,
|
|
753
|
+
lifetime=file.get('lifetime'),
|
|
754
|
+
ignore_availability=ignore_availability,
|
|
755
|
+
activity=activity)
|
|
483
756
|
logger(logging.INFO, 'Successfully added replication rule at %s' % rse)
|
|
484
757
|
|
|
485
|
-
def _get_file_guid(
|
|
758
|
+
def _get_file_guid(
|
|
759
|
+
self,
|
|
760
|
+
file: "Mapping[str, Any]"
|
|
761
|
+
) -> str:
|
|
486
762
|
"""
|
|
487
|
-
|
|
488
|
-
|
|
763
|
+
Returns the unique identifier (GUID) for the given file.
|
|
764
|
+
|
|
765
|
+
If no GUID exists and the filename suggests a ROOT file, it extracts it with
|
|
766
|
+
`pool_extractFileIdentifier`. If a GUID exists, it is returned without dashes.
|
|
767
|
+
Otherwise, a new GUID is generated.
|
|
768
|
+
|
|
769
|
+
Parameters
|
|
770
|
+
----------
|
|
771
|
+
file
|
|
772
|
+
A dictionary describing the file, expected to include:
|
|
773
|
+
|
|
774
|
+
* **`basename`**:
|
|
775
|
+
The base filename.
|
|
489
776
|
|
|
490
|
-
|
|
777
|
+
* **`path`**:
|
|
778
|
+
The path to the file.
|
|
491
779
|
|
|
492
|
-
|
|
780
|
+
* **`guid`** (optional):
|
|
781
|
+
A pre-assigned GUID string.
|
|
782
|
+
|
|
783
|
+
* **`no_register`** (optional):
|
|
784
|
+
If True, skip attempts to derive a GUID for ROOT files.
|
|
785
|
+
|
|
786
|
+
Returns
|
|
787
|
+
-------
|
|
788
|
+
str
|
|
789
|
+
A string containing the file's GUID, stripped of dashes and in lowercase.
|
|
790
|
+
|
|
791
|
+
Raises
|
|
792
|
+
------
|
|
793
|
+
RucioException
|
|
794
|
+
If GUID extraction using the `pool_extractFileIdentifier` command fails.
|
|
493
795
|
"""
|
|
494
796
|
guid = file.get('guid')
|
|
495
797
|
if not guid and 'pool.root' in file['basename'].lower() and not file.get('no_register'):
|
|
@@ -514,14 +816,24 @@ class UploadClient:
|
|
|
514
816
|
item: "FileToUploadDict"
|
|
515
817
|
) -> "FileToUploadWithCollectedInfoDict":
|
|
516
818
|
"""
|
|
517
|
-
Collects
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
819
|
+
Collects and returns essential file descriptors (e.g., size, checksums, GUID, etc.).
|
|
820
|
+
|
|
821
|
+
This method computes the file's size, calculates its Adler-32 and MD5 checksums,
|
|
822
|
+
and retrieves the file's GUID. These values, along with other existing fields from
|
|
823
|
+
the input dictionary, are returned in a new dictionary.
|
|
824
|
+
|
|
825
|
+
Parameters
|
|
826
|
+
----------
|
|
827
|
+
filepath
|
|
828
|
+
The local filesystem path to the file.
|
|
829
|
+
item
|
|
830
|
+
A dictionary containing initial upload parameters (e.g., RSE name, scope) for the
|
|
831
|
+
file. Some of its fields may be updated or augmented in the returned dictionary.
|
|
832
|
+
|
|
833
|
+
Returns
|
|
834
|
+
-------
|
|
835
|
+
"FileToUploadWithCollectedInfoDict"
|
|
836
|
+
A new dictionary enriched with relevant file descriptors.
|
|
525
837
|
"""
|
|
526
838
|
new_item = copy.deepcopy(item)
|
|
527
839
|
new_item = cast("FileToUploadWithCollectedInfoDict", new_item)
|
|
@@ -541,17 +853,51 @@ class UploadClient:
|
|
|
541
853
|
|
|
542
854
|
return new_item
|
|
543
855
|
|
|
544
|
-
def _collect_and_validate_file_info(
|
|
856
|
+
def _collect_and_validate_file_info(
|
|
857
|
+
self,
|
|
858
|
+
items: "Iterable[FileToUploadDict]"
|
|
859
|
+
) -> list["FileToUploadWithCollectedInfoDict"]:
|
|
545
860
|
"""
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
861
|
+
Collect and verify local file info for upload, optionally registering folders as
|
|
862
|
+
datasets/containers.
|
|
863
|
+
|
|
864
|
+
This method iterates over the provided items, each describing a local path and
|
|
865
|
+
associated upload parameters, checks that each item has a valid path and RSE, and
|
|
866
|
+
computes basic file details such as size and checksums. If the item is a directory
|
|
867
|
+
and `recursive` is set, the method calls `_recursive` to traverse subdirectories,
|
|
868
|
+
creating or attaching them as Rucio datasets or containers.
|
|
869
|
+
|
|
870
|
+
Parameters
|
|
871
|
+
----------
|
|
872
|
+
items
|
|
873
|
+
An iterable of dictionaries describing files or directories, where each dictionary
|
|
874
|
+
typically has:
|
|
875
|
+
|
|
876
|
+
* **`path`**:
|
|
877
|
+
Local file system path
|
|
878
|
+
|
|
879
|
+
* **`rse`**:
|
|
880
|
+
Name of the RSE destination
|
|
881
|
+
|
|
882
|
+
* **`pfn`** (optional):
|
|
883
|
+
Physical file name (PFN)
|
|
884
|
+
|
|
885
|
+
* **`impl`** (optional):
|
|
886
|
+
Protocol implementation
|
|
887
|
+
|
|
888
|
+
* **`recursive`** (optional):
|
|
889
|
+
Whether to traverse directories recursively
|
|
890
|
+
|
|
891
|
+
Returns
|
|
892
|
+
-------
|
|
893
|
+
list["FileToUploadWithCollectedInfoDict"]
|
|
894
|
+
A list of dictionaries enriched with file descriptors (size, checksums, etc.)
|
|
895
|
+
and ready for further upload processing.
|
|
896
|
+
|
|
897
|
+
Raises
|
|
898
|
+
------
|
|
899
|
+
InputValidationError
|
|
900
|
+
If no valid files are found.
|
|
555
901
|
"""
|
|
556
902
|
logger = self.logger
|
|
557
903
|
files: list["FileToUploadWithCollectedInfoDict"] = []
|
|
@@ -583,7 +929,8 @@ class UploadClient:
|
|
|
583
929
|
if not len(fnames) and not len(subdirs):
|
|
584
930
|
logger(logging.WARNING, 'Skipping %s because it is empty.' % dname)
|
|
585
931
|
elif not len(fnames):
|
|
586
|
-
logger(logging.WARNING,
|
|
932
|
+
logger(logging.WARNING,
|
|
933
|
+
'Skipping %s because it has no files in it. Subdirectories are not supported.' % dname)
|
|
587
934
|
elif os.path.isdir(path) and recursive:
|
|
588
935
|
files.extend(cast("list[FileToUploadWithCollectedInfoDict]", self._recursive(item)))
|
|
589
936
|
elif os.path.isfile(path) and not recursive:
|
|
@@ -599,15 +946,27 @@ class UploadClient:
|
|
|
599
946
|
|
|
600
947
|
return files
|
|
601
948
|
|
|
602
|
-
def _convert_file_for_api(
|
|
949
|
+
def _convert_file_for_api(
|
|
950
|
+
self,
|
|
951
|
+
file: "Mapping[str, Any]"
|
|
952
|
+
) -> dict[str, Any]:
|
|
603
953
|
"""
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
954
|
+
Create a minimal dictionary of file attributes for the Rucio API.
|
|
955
|
+
|
|
956
|
+
This method extracts only the necessary fields from the provided file dictionary,
|
|
957
|
+
producing a new dictionary that is suitable for registering or updating
|
|
958
|
+
a file replica in Rucio.
|
|
959
|
+
|
|
960
|
+
Parameters
|
|
961
|
+
----------
|
|
962
|
+
file
|
|
963
|
+
A dictionary describing a file, expected to include at least `did_scope`,
|
|
964
|
+
`did_name`, `bytes`, `adler32`, `md5`, `meta`, `state`, and optionally `pfn`.
|
|
965
|
+
|
|
966
|
+
Returns
|
|
967
|
+
-------
|
|
968
|
+
dict[str, Any]
|
|
969
|
+
A dictionary containing only the relevant file attributes for Rucio's REST API.
|
|
611
970
|
"""
|
|
612
971
|
replica = {}
|
|
613
972
|
replica['scope'] = file['did_scope']
|
|
@@ -637,27 +996,67 @@ class UploadClient:
|
|
|
637
996
|
sign_service: Optional[str] = None
|
|
638
997
|
) -> Optional[str]:
|
|
639
998
|
"""
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
999
|
+
Perform the actual file transfer to an RSE using the appropriate protocol.
|
|
1000
|
+
|
|
1001
|
+
This method is used once all necessary file information is resolved (logical file
|
|
1002
|
+
name, checksums, etc.). It creates and verifies the physical file name (PFN),
|
|
1003
|
+
optionally removes or overwrites stale replicas, uploads the file (potentially via
|
|
1004
|
+
a temporary PFN suffix), checks its size/checksum consistency, and finalizes it
|
|
1005
|
+
under the expected PFN.
|
|
1006
|
+
|
|
1007
|
+
Parameters
|
|
1008
|
+
----------
|
|
1009
|
+
rse_settings
|
|
1010
|
+
Dictionary containing the RSE configuration.
|
|
1011
|
+
rse_attributes
|
|
1012
|
+
Additional attributes of the RSE (e.g. 'archive_timeout').
|
|
1013
|
+
lfn
|
|
1014
|
+
An optional dictionary describing the logical file (e.g., {'name': '1_rse_local_put.raw',
|
|
1015
|
+
'scope': 'user.jdoe', ..}). If the 'filename' key is present, it overrides 'name'
|
|
1016
|
+
in determining the local file name to read from source_dir.
|
|
1017
|
+
source_dir
|
|
1018
|
+
Local source directory path where the file to be uploaded resides.
|
|
1019
|
+
domain
|
|
1020
|
+
Network domain for the upload, commonly 'wan' for wide-area networks.
|
|
1021
|
+
impl
|
|
1022
|
+
Name of the protocol implementation to be enforced (if any).
|
|
1023
|
+
force_pfn
|
|
1024
|
+
If provided, forces the use of this PFN for the file location on the storage
|
|
1025
|
+
(use with care since it can lead to "dark" data).
|
|
1026
|
+
force_scheme
|
|
1027
|
+
If provided, forces the protocol scheme (e.g. 'davs', 'https') to be used.
|
|
1028
|
+
transfer_timeout
|
|
1029
|
+
Timeout (in seconds) for the transfer operation before it fails.
|
|
1030
|
+
delete_existing
|
|
1031
|
+
If True, removes any unregistered or stale file on the storage that matches this PFN.
|
|
1032
|
+
sign_service
|
|
1033
|
+
If set, requests a signed URL from the given service (e.g., gcs, s3, swift).
|
|
1034
|
+
|
|
1035
|
+
Returns
|
|
1036
|
+
-------
|
|
1037
|
+
Optional[str]
|
|
1038
|
+
The final PFN (physical file name) of the successfully uploaded file, or None
|
|
1039
|
+
if creation failed.
|
|
1040
|
+
|
|
1041
|
+
Raises
|
|
1042
|
+
------
|
|
1043
|
+
FileReplicaAlreadyExists
|
|
1044
|
+
If the target file already exists and overwrite is not allowed.
|
|
1045
|
+
RSEOperationNotSupported
|
|
1046
|
+
If storage-side operations (delete/rename/put) are not supported or fail.
|
|
1047
|
+
RucioException
|
|
1048
|
+
If renaming or other critical operations cannot be completed.
|
|
655
1049
|
"""
|
|
1050
|
+
|
|
656
1051
|
logger = self.logger
|
|
657
1052
|
|
|
658
1053
|
# Construct protocol for write operation.
|
|
659
1054
|
# IMPORTANT: All upload stat() checks are always done with the write_protocol EXCEPT for cloud resources (signed URL for write cannot be used for read)
|
|
660
|
-
protocol_write = self._create_protocol(rse_settings,
|
|
1055
|
+
protocol_write = self._create_protocol(rse_settings,
|
|
1056
|
+
'write',
|
|
1057
|
+
force_scheme=force_scheme,
|
|
1058
|
+
domain=domain,
|
|
1059
|
+
impl=impl)
|
|
661
1060
|
|
|
662
1061
|
base_name = lfn.get('filename', lfn['name'])
|
|
663
1062
|
name = lfn.get('name', base_name)
|
|
@@ -682,44 +1081,58 @@ class UploadClient:
|
|
|
682
1081
|
|
|
683
1082
|
# Auth. mostly for object stores
|
|
684
1083
|
if sign_service:
|
|
685
|
-
protocol_read = self._create_protocol(rse_settings,
|
|
1084
|
+
protocol_read = self._create_protocol(rse_settings,
|
|
1085
|
+
'read',
|
|
1086
|
+
domain=domain,
|
|
1087
|
+
impl=impl)
|
|
686
1088
|
if pfn is not None:
|
|
687
1089
|
signed_read_pfn = self.client.get_signed_url(rse_settings['rse'], sign_service, 'read', pfn)
|
|
688
1090
|
pfn = self.client.get_signed_url(rse_settings['rse'], sign_service, 'write', pfn)
|
|
689
1091
|
|
|
690
|
-
# Create a name of tmp file if renaming operation is supported
|
|
1092
|
+
# Create a name of tmp file if the renaming operation is supported
|
|
691
1093
|
pfn_tmp = cast("str", '%s.rucio.upload' % pfn if protocol_write.renaming else pfn)
|
|
692
1094
|
signed_read_pfn_tmp = '%s.rucio.upload' % signed_read_pfn if protocol_write.renaming else signed_read_pfn
|
|
693
1095
|
|
|
694
1096
|
# Either DID exists or not register_after_upload
|
|
695
1097
|
if protocol_write.overwrite is False and delete_existing is False:
|
|
696
1098
|
if sign_service:
|
|
697
|
-
# Construct protocol for read
|
|
1099
|
+
# Construct protocol for read-ONLY for cloud resources and get signed URL for GET
|
|
698
1100
|
if protocol_read.exists(signed_read_pfn):
|
|
699
|
-
raise FileReplicaAlreadyExists('File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) # wrong exception
|
|
1101
|
+
raise FileReplicaAlreadyExists('File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) # wrong exception?
|
|
700
1102
|
elif protocol_write.exists(pfn):
|
|
701
|
-
raise FileReplicaAlreadyExists(
|
|
1103
|
+
raise FileReplicaAlreadyExists(
|
|
1104
|
+
'File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) # wrong exception?
|
|
702
1105
|
|
|
703
1106
|
# Removing tmp from earlier attempts
|
|
704
|
-
if (not sign_service and protocol_write.exists(pfn_tmp)) or (
|
|
1107
|
+
if (not sign_service and protocol_write.exists(pfn_tmp)) or (
|
|
1108
|
+
sign_service and protocol_read.exists(signed_read_pfn_tmp)):
|
|
705
1109
|
logger(logging.DEBUG, 'Removing remains of previous upload attempts.')
|
|
706
1110
|
try:
|
|
707
1111
|
# Construct protocol for delete operation.
|
|
708
|
-
protocol_delete = self._create_protocol(rse_settings,
|
|
1112
|
+
protocol_delete = self._create_protocol(rse_settings,
|
|
1113
|
+
'delete',
|
|
1114
|
+
force_scheme=force_scheme,
|
|
1115
|
+
domain=domain,
|
|
1116
|
+
impl=impl)
|
|
709
1117
|
delete_pfn = '%s.rucio.upload' % list(protocol_delete.lfns2pfns(make_valid_did(lfn)).values())[0]
|
|
710
1118
|
if sign_service:
|
|
711
1119
|
delete_pfn = self.client.get_signed_url(rse_settings['rse'], sign_service, 'delete', delete_pfn)
|
|
712
1120
|
protocol_delete.delete(delete_pfn)
|
|
713
1121
|
protocol_delete.close()
|
|
714
1122
|
except Exception as error:
|
|
715
|
-
raise RSEOperationNotSupported(
|
|
1123
|
+
raise RSEOperationNotSupported(
|
|
1124
|
+
'Unable to remove temporary file %s.rucio.upload: %s' % (pfn, str(error)))
|
|
716
1125
|
|
|
717
1126
|
# Removing not registered files from earlier attempts
|
|
718
1127
|
if delete_existing:
|
|
719
1128
|
logger(logging.DEBUG, 'Removing not-registered remains of previous upload attempts.')
|
|
720
1129
|
try:
|
|
721
1130
|
# Construct protocol for delete operation.
|
|
722
|
-
protocol_delete = self._create_protocol(rse_settings,
|
|
1131
|
+
protocol_delete = self._create_protocol(rse_settings,
|
|
1132
|
+
'delete',
|
|
1133
|
+
force_scheme=force_scheme,
|
|
1134
|
+
domain=domain,
|
|
1135
|
+
impl=impl)
|
|
723
1136
|
delete_pfn = '%s' % list(protocol_delete.lfns2pfns(make_valid_did(lfn)).values())[0]
|
|
724
1137
|
if sign_service:
|
|
725
1138
|
delete_pfn = self.client.get_signed_url(rse_settings['rse'], sign_service, 'delete', delete_pfn)
|
|
@@ -730,7 +1143,14 @@ class UploadClient:
|
|
|
730
1143
|
|
|
731
1144
|
# Process the upload of the tmp file
|
|
732
1145
|
try:
|
|
733
|
-
retry(protocol_write.put,
|
|
1146
|
+
retry(protocol_write.put,
|
|
1147
|
+
base_name,
|
|
1148
|
+
pfn_tmp,
|
|
1149
|
+
source_dir,
|
|
1150
|
+
transfer_timeout=transfer_timeout)(
|
|
1151
|
+
mtries=2,
|
|
1152
|
+
logger=logger
|
|
1153
|
+
)
|
|
734
1154
|
logger(logging.INFO, 'Successful upload of temporary file. {}'.format(pfn_tmp))
|
|
735
1155
|
except Exception as error:
|
|
736
1156
|
raise RSEOperationNotSupported(str(error))
|
|
@@ -750,17 +1170,20 @@ class UploadClient:
|
|
|
750
1170
|
if ('filesize' in stats) and ('filesize' in lfn):
|
|
751
1171
|
self.logger(logging.DEBUG, 'Filesize: Expected=%s Found=%s' % (lfn['filesize'], stats['filesize']))
|
|
752
1172
|
if int(stats['filesize']) != int(lfn['filesize']):
|
|
753
|
-
raise RucioException(
|
|
1173
|
+
raise RucioException(
|
|
1174
|
+
'Filesize mismatch. Source: %s Destination: %s' % (lfn['filesize'], stats['filesize']))
|
|
754
1175
|
if rse_settings['verify_checksum'] is not False:
|
|
755
1176
|
if ('adler32' in stats) and ('adler32' in lfn):
|
|
756
|
-
self.logger(logging.DEBUG,
|
|
1177
|
+
self.logger(logging.DEBUG,
|
|
1178
|
+
'Checksum: Expected=%s Found=%s' % (lfn['adler32'], stats['adler32']))
|
|
757
1179
|
if str(stats['adler32']).lstrip('0') != str(lfn['adler32']).lstrip('0'):
|
|
758
|
-
raise RucioException(
|
|
1180
|
+
raise RucioException(
|
|
1181
|
+
'Checksum mismatch. Source: %s Destination: %s' % (lfn['adler32'], stats['adler32']))
|
|
759
1182
|
|
|
760
1183
|
except Exception as error:
|
|
761
1184
|
raise error
|
|
762
1185
|
|
|
763
|
-
# The upload finished
|
|
1186
|
+
# The upload finished successfully and the file can be renamed
|
|
764
1187
|
try:
|
|
765
1188
|
if protocol_write.renaming:
|
|
766
1189
|
logger(logging.DEBUG, 'Renaming file %s to %s' % (pfn_tmp, pfn))
|
|
@@ -778,9 +1201,31 @@ class UploadClient:
|
|
|
778
1201
|
pfn: str
|
|
779
1202
|
) -> dict[str, Any]:
|
|
780
1203
|
"""
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
1204
|
+
Attempt to retrieve file statistics with exponential backoff.
|
|
1205
|
+
|
|
1206
|
+
This method invokes `protocol.stat` a limited number of times, waiting with an
|
|
1207
|
+
exponential backoff between each attempt when an error occurs. After the configured
|
|
1208
|
+
number of retries, the method performs one final `stat` call and returns its result
|
|
1209
|
+
or lets any resulting exception propagate.
|
|
1210
|
+
|
|
1211
|
+
Parameters
|
|
1212
|
+
----------
|
|
1213
|
+
protocol
|
|
1214
|
+
The RSEProtocol instance to use for retrieving file statistics
|
|
1215
|
+
pfn
|
|
1216
|
+
The physical file name (PFN) to be checked.
|
|
1217
|
+
|
|
1218
|
+
Returns
|
|
1219
|
+
-------
|
|
1220
|
+
dict[str, Any]
|
|
1221
|
+
A dictionary expected to include the filesize and adler32 for the provided pfn.
|
|
1222
|
+
|
|
1223
|
+
Raises
|
|
1224
|
+
------
|
|
1225
|
+
RSEChecksumUnavailable
|
|
1226
|
+
If the protocol indicates a missing checksum for the file.
|
|
1227
|
+
Exception
|
|
1228
|
+
If the requested service is not available or permissions are not granted.
|
|
784
1229
|
"""
|
|
785
1230
|
retries = config_get_int('client', 'protocol_stat_retries', raise_exception=False, default=6)
|
|
786
1231
|
for attempt in range(retries):
|
|
@@ -800,8 +1245,8 @@ class UploadClient:
|
|
|
800
1245
|
fail_str = ['The requested service is not available at the moment', 'Permission refused']
|
|
801
1246
|
if any(x in str(error) for x in fail_str):
|
|
802
1247
|
raise error
|
|
803
|
-
self.logger(logging.DEBUG, 'stat: unknown edge case, retrying in %ss' % 2**attempt)
|
|
804
|
-
time.sleep(2**attempt)
|
|
1248
|
+
self.logger(logging.DEBUG, 'stat: unknown edge case, retrying in %ss' % 2 ** attempt)
|
|
1249
|
+
time.sleep(2 ** attempt)
|
|
805
1250
|
return protocol.stat(pfn)
|
|
806
1251
|
|
|
807
1252
|
def _create_protocol(
|
|
@@ -813,14 +1258,44 @@ class UploadClient:
|
|
|
813
1258
|
domain: str = 'wan'
|
|
814
1259
|
) -> "RSEProtocol":
|
|
815
1260
|
"""
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
1261
|
+
Creates and returns the protocol object for the requested RSE operation.
|
|
1262
|
+
|
|
1263
|
+
Establishes a connection using the specified parameters (scheme, domain, etc.)
|
|
1264
|
+
and returns a protocol instance capable of handling the requested operation.
|
|
1265
|
+
|
|
1266
|
+
Parameters
|
|
1267
|
+
----------
|
|
1268
|
+
rse_settings
|
|
1269
|
+
The dictionary containing RSE configuration.
|
|
1270
|
+
operation
|
|
1271
|
+
The intended operation, such as 'read', 'write', or 'delete'.
|
|
1272
|
+
impl
|
|
1273
|
+
An optional override for the default protocol implementation.
|
|
1274
|
+
force_scheme
|
|
1275
|
+
If provided, forces the protocol to use this scheme.
|
|
1276
|
+
domain
|
|
1277
|
+
The network domain to be used, defaulting to 'wan'.
|
|
1278
|
+
|
|
1279
|
+
Returns
|
|
1280
|
+
-------
|
|
1281
|
+
"RSEProtocol"
|
|
1282
|
+
The instantiated `RSEProtocol` object.
|
|
1283
|
+
|
|
1284
|
+
Raises
|
|
1285
|
+
------
|
|
1286
|
+
Exception
|
|
1287
|
+
If the protocol creation or connection attempt fails.
|
|
821
1288
|
"""
|
|
822
1289
|
try:
|
|
823
|
-
protocol = rsemgr.create_protocol(
|
|
1290
|
+
protocol = rsemgr.create_protocol(
|
|
1291
|
+
rse_settings,
|
|
1292
|
+
operation,
|
|
1293
|
+
scheme=force_scheme,
|
|
1294
|
+
domain=domain,
|
|
1295
|
+
impl=impl,
|
|
1296
|
+
auth_token=self.auth_token,
|
|
1297
|
+
logger=self.logger
|
|
1298
|
+
)
|
|
824
1299
|
protocol.connect()
|
|
825
1300
|
except Exception as error:
|
|
826
1301
|
self.logger(logging.WARNING, 'Failed to create protocol for operation: %s' % operation)
|
|
@@ -828,24 +1303,69 @@ class UploadClient:
|
|
|
828
1303
|
raise error
|
|
829
1304
|
return protocol
|
|
830
1305
|
|
|
831
|
-
def _send_trace(
|
|
1306
|
+
def _send_trace(
|
|
1307
|
+
self,
|
|
1308
|
+
trace: "TraceDict"
|
|
1309
|
+
) -> None:
|
|
832
1310
|
"""
|
|
833
|
-
|
|
1311
|
+
Sends the trace if tracing is enabled.
|
|
834
1312
|
|
|
835
|
-
|
|
1313
|
+
If `self.tracing` is True, this method uses Rucio's `send_trace` function to
|
|
1314
|
+
dispatch the provided trace object to Rucio host. Otherwise, it takes no action.
|
|
1315
|
+
|
|
1316
|
+
Parameters
|
|
1317
|
+
----------
|
|
1318
|
+
trace
|
|
1319
|
+
The trace object to be sent.
|
|
836
1320
|
"""
|
|
837
1321
|
if self.tracing:
|
|
838
1322
|
send_trace(trace, self.client.trace_host, self.client.user_agent)
|
|
839
1323
|
|
|
840
|
-
def _recursive(
|
|
1324
|
+
def _recursive(
|
|
1325
|
+
self,
|
|
1326
|
+
item: "FileToUploadDict"
|
|
1327
|
+
) -> list["FileToUploadWithCollectedAndDatasetInfoDict"]:
|
|
841
1328
|
"""
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
1329
|
+
Recursively inspects a folder and creates corresponding Rucio datasets or containers.
|
|
1330
|
+
|
|
1331
|
+
This method traverses the local path specified in the given dictionary `item` and
|
|
1332
|
+
interprets subfolders as either Rucio containers (if they themselves contain further
|
|
1333
|
+
subfolders) or datasets (if they only contain files). Files within these datasets
|
|
1334
|
+
are gathered into a list with additional upload information. The method also attempts
|
|
1335
|
+
to create and attach these datasets/containers in Rucio, replicating the folder
|
|
1336
|
+
structure.
|
|
1337
|
+
|
|
1338
|
+
Note:
|
|
1339
|
+
------
|
|
1340
|
+
Currently, this method does not allow the top-level directory to contain both files
|
|
1341
|
+
and subdirectories.
|
|
1342
|
+
|
|
1343
|
+
Parameters
|
|
1344
|
+
----------
|
|
1345
|
+
item
|
|
1346
|
+
A dictionary describing the local path and upload parameters.
|
|
1347
|
+
It must contain at least:
|
|
1348
|
+
|
|
1349
|
+
* **`rse`**:
|
|
1350
|
+
The target RSE for the upload.
|
|
1351
|
+
|
|
1352
|
+
* **`path`**:
|
|
1353
|
+
The local directory path to inspect.
|
|
1354
|
+
|
|
1355
|
+
* **`did_scope`** (optional):
|
|
1356
|
+
Custom scope for the resulting datasets/containers.
|
|
1357
|
+
|
|
1358
|
+
Returns
|
|
1359
|
+
-------
|
|
1360
|
+
list["FileToUploadWithCollectedAndDatasetInfoDict"]
|
|
1361
|
+
A list of file descriptors enriched with collected file information, each
|
|
1362
|
+
conforming to FileToUploadWithCollectedAndDatasetInfoDict.
|
|
1363
|
+
|
|
1364
|
+
Raises
|
|
1365
|
+
------
|
|
1366
|
+
InputValidationError
|
|
1367
|
+
If a folder contains both files and subdirectories at its top level (invalid
|
|
1368
|
+
container/dataset structure).
|
|
849
1369
|
"""
|
|
850
1370
|
files: list["FileToUploadWithCollectedAndDatasetInfoDict"] = []
|
|
851
1371
|
datasets: list["DatasetDict"] = []
|
|
@@ -878,7 +1398,8 @@ class UploadClient:
|
|
|
878
1398
|
elif len(dirs) > 0:
|
|
879
1399
|
containers.append({'scope': scope, 'name': root.split('/')[-1]})
|
|
880
1400
|
self.logger(logging.DEBUG, 'Appended container with DID %s:%s' % (scope, path))
|
|
881
|
-
attach.extend([{'scope': scope, 'name': root.split('/')[-1], 'rse': rse,
|
|
1401
|
+
attach.extend([{'scope': scope, 'name': root.split('/')[-1], 'rse': rse,
|
|
1402
|
+
'did': {'scope': scope, 'name': dir_}} for dir_ in dirs])
|
|
882
1403
|
elif len(dirs) == 0 and len(fnames) == 0:
|
|
883
1404
|
self.logger(logging.WARNING, 'The folder %s is empty, skipping' % root)
|
|
884
1405
|
continue
|
|
@@ -904,7 +1425,8 @@ class UploadClient:
|
|
|
904
1425
|
self.logger(logging.INFO, 'DIDs attached to collection %s:%s' % (att['scope'], att['name']))
|
|
905
1426
|
except RucioException as error:
|
|
906
1427
|
self.logger(logging.ERROR, error)
|
|
907
|
-
self.logger(logging.ERROR,
|
|
1428
|
+
self.logger(logging.ERROR,
|
|
1429
|
+
'It was not possible to attach to collection with DID %s:%s' % (att['scope'], att['name']))
|
|
908
1430
|
return files
|
|
909
1431
|
|
|
910
1432
|
def preferred_impl(
|
|
@@ -913,13 +1435,29 @@ class UploadClient:
|
|
|
913
1435
|
domain: str
|
|
914
1436
|
) -> Optional[str]:
|
|
915
1437
|
"""
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
1438
|
+
Select a suitable protocol implementation for read, write, and delete operations on
|
|
1439
|
+
the given RSE and domain.
|
|
1440
|
+
|
|
1441
|
+
This method checks the local client configuration (under the `[upload] preferred_impl`
|
|
1442
|
+
setting) and compares it against the list of protocols declared in `rse_settings`.
|
|
1443
|
+
It attempts to find a protocol that supports the required I/O operations (read,
|
|
1444
|
+
write, delete) in the specified domain. If multiple preferred protocols are listed
|
|
1445
|
+
in the config, it iterates in order and returns the first viable match.
|
|
1446
|
+
|
|
1447
|
+
Parameters
|
|
1448
|
+
----------
|
|
1449
|
+
rse_settings
|
|
1450
|
+
A dictionary describing RSE details, including available protocols and their
|
|
1451
|
+
domains.
|
|
1452
|
+
domain
|
|
1453
|
+
The network domain (e.g., 'lan' or 'wan') in which the protocol must support
|
|
1454
|
+
all operations.
|
|
1455
|
+
|
|
1456
|
+
Returns
|
|
1457
|
+
-------
|
|
1458
|
+
Optional[str]
|
|
1459
|
+
The name of a protocol implementation that can handle read/write/delete
|
|
1460
|
+
for the specified domain, or None if no suitable protocol was found.
|
|
923
1461
|
"""
|
|
924
1462
|
preferred_protocols = []
|
|
925
1463
|
supported_impl = None
|
|
@@ -941,28 +1479,34 @@ class UploadClient:
|
|
|
941
1479
|
preferred_impls[i] = 'rucio.rse.protocols.' + impl
|
|
942
1480
|
i += 1
|
|
943
1481
|
|
|
944
|
-
preferred_protocols = [protocol for protocol in reversed(rse_settings['protocols']) if
|
|
1482
|
+
preferred_protocols = [protocol for protocol in reversed(rse_settings['protocols']) if
|
|
1483
|
+
protocol['impl'] in preferred_impls]
|
|
945
1484
|
|
|
946
1485
|
if len(preferred_protocols) > 0:
|
|
947
|
-
preferred_protocols += [protocol for protocol in reversed(rse_settings['protocols']) if
|
|
1486
|
+
preferred_protocols += [protocol for protocol in reversed(rse_settings['protocols']) if
|
|
1487
|
+
protocol not in preferred_protocols]
|
|
948
1488
|
else:
|
|
949
1489
|
preferred_protocols = reversed(rse_settings['protocols'])
|
|
950
1490
|
|
|
951
1491
|
for protocol in preferred_protocols:
|
|
952
1492
|
if domain not in list(protocol['domains'].keys()):
|
|
953
|
-
self.logger(logging.DEBUG,
|
|
1493
|
+
self.logger(logging.DEBUG,
|
|
1494
|
+
'Unsuitable protocol "%s": Domain %s not supported' % (protocol['impl'], domain))
|
|
954
1495
|
continue
|
|
955
1496
|
if not all(operations in protocol['domains'][domain] for operations in ("read", "write", "delete")):
|
|
956
|
-
self.logger(logging.DEBUG,
|
|
1497
|
+
self.logger(logging.DEBUG,
|
|
1498
|
+
'Unsuitable protocol "%s": All operations are not supported' % (protocol['impl']))
|
|
957
1499
|
continue
|
|
958
1500
|
try:
|
|
959
|
-
supported_protocol = rsemgr.create_protocol(rse_settings, 'write', domain=domain, impl=protocol['impl'],
|
|
1501
|
+
supported_protocol = rsemgr.create_protocol(rse_settings, 'write', domain=domain, impl=protocol['impl'],
|
|
1502
|
+
auth_token=self.auth_token, logger=self.logger)
|
|
960
1503
|
supported_protocol.connect()
|
|
961
1504
|
except Exception as error:
|
|
962
1505
|
self.logger(logging.DEBUG, 'Failed to create protocol "%s", exception: %s' % (protocol['impl'], error))
|
|
963
1506
|
pass
|
|
964
1507
|
else:
|
|
965
|
-
self.logger(logging.INFO,
|
|
1508
|
+
self.logger(logging.INFO,
|
|
1509
|
+
'Preferred protocol impl supported locally and remotely: %s' % (protocol['impl']))
|
|
966
1510
|
supported_impl = protocol['impl']
|
|
967
1511
|
break
|
|
968
1512
|
|