ctao-bdms-clients 0.1.0rc3__py3-none-any.whl → 0.2.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bdms/_version.py +9 -4
- bdms/acada_ingestion.py +435 -0
- bdms/tests/conftest.py +41 -0
- bdms/tests/test_acada_ingestion.py +501 -0
- bdms/tests/test_basic_rucio_functionality.py +15 -27
- bdms/tests/test_onsite_storage.py +119 -0
- bdms/tests/utils.py +112 -0
- {ctao_bdms_clients-0.1.0rc3.dist-info → ctao_bdms_clients-0.2.0rc1.dist-info}/METADATA +9 -4
- ctao_bdms_clients-0.2.0rc1.dist-info/RECORD +18 -0
- {ctao_bdms_clients-0.1.0rc3.dist-info → ctao_bdms_clients-0.2.0rc1.dist-info}/WHEEL +1 -1
- bdms/_dev_version/__init__.py +0 -9
- ctao_bdms_clients-0.1.0rc3.dist-info/RECORD +0 -15
- {ctao_bdms_clients-0.1.0rc3.dist-info → ctao_bdms_clients-0.2.0rc1.dist-info/licenses}/LICENSE +0 -0
- {ctao_bdms_clients-0.1.0rc3.dist-info → ctao_bdms_clients-0.2.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,501 @@
|
|
1
|
+
"""Tests for onsite ingestion and replication into the BDMS system using the IngestionClient.
|
2
|
+
|
3
|
+
This module contains tests for the IngestionClient class, focusing on the conversion of ACADA paths to Logical File Names (LFNs), the registration of replicas in Rucio,
|
4
|
+
and the replication of data between Rucio storage elements (RSEs).
|
5
|
+
"""
|
6
|
+
|
7
|
+
import logging
|
8
|
+
import os
|
9
|
+
import subprocess
|
10
|
+
from datetime import datetime
|
11
|
+
from pathlib import Path
|
12
|
+
from secrets import token_hex
|
13
|
+
|
14
|
+
import pytest
|
15
|
+
from astropy.io import fits
|
16
|
+
from astropy.table import Table
|
17
|
+
from rucio.client import Client
|
18
|
+
from rucio.client.downloadclient import DownloadClient
|
19
|
+
from rucio.client.replicaclient import ReplicaClient
|
20
|
+
from rucio.client.ruleclient import RuleClient
|
21
|
+
from rucio.common.exception import RucioException
|
22
|
+
from rucio.common.utils import adler32
|
23
|
+
|
24
|
+
from bdms.acada_ingestion import IngestionClient
|
25
|
+
from bdms.tests.utils import wait_for_replication_status
|
26
|
+
|
27
|
+
LOGGER = logging.getLogger(__name__)
|
28
|
+
|
29
|
+
XROOTD_UID = 994
|
30
|
+
XROOTD_GID = 994
|
31
|
+
ONSITE_RSE = "STORAGE-1"
|
32
|
+
OFFSITE_RSE_1 = "STORAGE-2"
|
33
|
+
OFFSITE_RSE_2 = "STORAGE-3"
|
34
|
+
|
35
|
+
|
36
|
+
def test_shared_storage(storage_mount_path: Path):
|
37
|
+
"""Test that the shared storage path is available."""
|
38
|
+
|
39
|
+
assert (
|
40
|
+
storage_mount_path.exists()
|
41
|
+
), f"Shared storage {storage_mount_path} is not available on the client"
|
42
|
+
|
43
|
+
|
44
|
+
def recursive_chown(path: Path, uid: int, gid: int):
|
45
|
+
"""Equivalent of unix chmod -R <uid>:<gid> <path>."""
|
46
|
+
for root, dirs, files in os.walk(path):
|
47
|
+
root = Path(root)
|
48
|
+
for d in dirs:
|
49
|
+
os.chown(root / d, uid, gid)
|
50
|
+
for f in files:
|
51
|
+
os.chown(root / f, uid, gid)
|
52
|
+
|
53
|
+
|
54
|
+
def trigger_judge_repairer() -> None:
|
55
|
+
"""Trigger the rucio-judge-repairer daemon to run once and fix any STUCK rules."""
|
56
|
+
|
57
|
+
try:
|
58
|
+
cmd = [
|
59
|
+
"./kubectl",
|
60
|
+
"exec",
|
61
|
+
"deployment/bdms-judge-evaluator",
|
62
|
+
"--",
|
63
|
+
"/usr/local/bin/rucio-judge-repairer",
|
64
|
+
"--run-once",
|
65
|
+
]
|
66
|
+
result = subprocess.run(
|
67
|
+
cmd,
|
68
|
+
check=True,
|
69
|
+
capture_output=True,
|
70
|
+
text=True,
|
71
|
+
)
|
72
|
+
LOGGER.info("Triggered rucio-judge-repairer daemon: %s", result.stdout)
|
73
|
+
except FileNotFoundError as e:
|
74
|
+
LOGGER.error("kubectl command not found: %s", str(e))
|
75
|
+
raise RuntimeError(
|
76
|
+
"kubectl command not found. Ensure kubectl is in the PATH or working directory."
|
77
|
+
) from e
|
78
|
+
except subprocess.CalledProcessError as e:
|
79
|
+
LOGGER.error("Failed to trigger rucio-judge-repairer daemon: %s", e.stderr)
|
80
|
+
raise
|
81
|
+
|
82
|
+
|
83
|
+
@pytest.fixture
|
84
|
+
def test_file(
|
85
|
+
storage_mount_path: Path, test_scope: str, test_vo: str
|
86
|
+
) -> tuple[Path, str]:
|
87
|
+
"""Create a dummy .fits.fz file in the shared storage for testing."""
|
88
|
+
|
89
|
+
unique_id = f"{datetime.now():%Y%m%d_%H%M%S}_{token_hex(8)}"
|
90
|
+
filename = f"testfile_{unique_id}.fits.fz"
|
91
|
+
|
92
|
+
test_file_path = storage_mount_path / test_vo / test_scope / filename
|
93
|
+
test_file_path.parent.mkdir(parents=True, exist_ok=True)
|
94
|
+
|
95
|
+
# need to change file permissions of created directories so that
|
96
|
+
# the xrootd still can read and write there
|
97
|
+
recursive_chown(storage_mount_path / test_vo, XROOTD_UID, XROOTD_GID)
|
98
|
+
|
99
|
+
# Write a small test content (simulating a .fits.fz file with minimal content for testing)
|
100
|
+
test_file_content = f"FITS-like content for {unique_id}"
|
101
|
+
test_file_path.write_text(test_file_content)
|
102
|
+
os.chown(test_file_path, XROOTD_UID, XROOTD_GID)
|
103
|
+
|
104
|
+
return test_file_path, test_file_content
|
105
|
+
|
106
|
+
|
107
|
+
def test_acada_to_lfn(storage_mount_path: Path, test_vo: str):
|
108
|
+
"""Test the acada_to_lfn method of IngestionClient with valid and invalid inputs."""
|
109
|
+
|
110
|
+
ingestion_client = IngestionClient(storage_mount_path, ONSITE_RSE, vo=test_vo)
|
111
|
+
|
112
|
+
# Test Case 1: valid acada_path
|
113
|
+
acada_path = (
|
114
|
+
f"{ingestion_client.data_path}/{ingestion_client.vo}/{ingestion_client.scope}/DL0/LSTN-01/events/2023/10/13/"
|
115
|
+
"Subarray_SWAT_sbid008_obid00081_0.fits.fz"
|
116
|
+
)
|
117
|
+
|
118
|
+
expected_lfn = (
|
119
|
+
f"/{ingestion_client.vo}/{ingestion_client.scope}/DL0/LSTN-01/events/2023/10/13/"
|
120
|
+
"Subarray_SWAT_sbid008_obid00081_0.fits.fz"
|
121
|
+
)
|
122
|
+
lfn = ingestion_client.acada_to_lfn(acada_path=acada_path)
|
123
|
+
|
124
|
+
assert lfn == expected_lfn, f"Expected {expected_lfn}, got {lfn}"
|
125
|
+
|
126
|
+
# Test Case 2: Non-absolute acada_path (empty string)
|
127
|
+
with pytest.raises(ValueError, match="acada_path must be absolute"):
|
128
|
+
ingestion_client.acada_to_lfn(acada_path="")
|
129
|
+
|
130
|
+
# Test Case 3: Non-absolute acada_path (relative path)
|
131
|
+
with pytest.raises(ValueError, match="acada_path must be absolute"):
|
132
|
+
ingestion_client.acada_to_lfn(acada_path="./test.fits")
|
133
|
+
|
134
|
+
# Test Case 4: acada_path not within data_path
|
135
|
+
invalid_acada_path = "/invalid/path/file.fits.fz"
|
136
|
+
with pytest.raises(ValueError, match="is not within data_path"):
|
137
|
+
ingestion_client.acada_to_lfn(acada_path=invalid_acada_path)
|
138
|
+
|
139
|
+
# Test Case 5: acada_path does not start with <vo>/<scope>
|
140
|
+
wrong_prefix_path = (
|
141
|
+
f"{ingestion_client.data_path}/wrong_vo/wrong_scope/DL0/LSTN-01/file.fits.fz"
|
142
|
+
)
|
143
|
+
with pytest.raises(ValueError, match="must start with"):
|
144
|
+
ingestion_client.acada_to_lfn(acada_path=wrong_prefix_path)
|
145
|
+
|
146
|
+
# Test Case 6: acada_path starts with <vo> but wrong <scope>
|
147
|
+
wrong_scope_path = f"{ingestion_client.data_path}/{ingestion_client.vo}/wrong_scope/DL0/LSTN-01/file.fits.fz"
|
148
|
+
with pytest.raises(ValueError, match="must start with"):
|
149
|
+
ingestion_client.acada_to_lfn(acada_path=wrong_scope_path)
|
150
|
+
|
151
|
+
|
152
|
+
@pytest.mark.usefixtures("_auth_proxy")
|
153
|
+
def test_check_replica_exists(
|
154
|
+
storage_mount_path: Path, test_scope: str, test_file: tuple[Path, str], test_vo: str
|
155
|
+
):
|
156
|
+
"""Test the check_replica_exists method of IngestionClient."""
|
157
|
+
|
158
|
+
ingestion_client = IngestionClient(
|
159
|
+
storage_mount_path, ONSITE_RSE, scope=test_scope, vo=test_vo
|
160
|
+
)
|
161
|
+
|
162
|
+
acada_path, _ = test_file
|
163
|
+
|
164
|
+
# Generate the LFN
|
165
|
+
lfn = ingestion_client.acada_to_lfn(acada_path)
|
166
|
+
|
167
|
+
# Test Case 1: No replica exists yet
|
168
|
+
msg = f"Expected no replica for LFN {lfn} before registration"
|
169
|
+
assert not ingestion_client.check_replica_exists(lfn), msg
|
170
|
+
|
171
|
+
# Register the replica in Rucio
|
172
|
+
ingestion_client.add_onsite_replica(acada_path)
|
173
|
+
|
174
|
+
# Test Case 2: Replica exists with a valid PFN
|
175
|
+
msg = f"Expected replica to exist for LFN {lfn} after registration"
|
176
|
+
assert ingestion_client.check_replica_exists(lfn), msg
|
177
|
+
|
178
|
+
# Test Case 3: Non-existent LFN
|
179
|
+
nonexistent_lfn = lfn + ".nonexistent"
|
180
|
+
msg = f"Expected no replica for nonexistent LFN {nonexistent_lfn}"
|
181
|
+
assert not ingestion_client.check_replica_exists(nonexistent_lfn), msg
|
182
|
+
|
183
|
+
|
184
|
+
@pytest.mark.usefixtures("_auth_proxy")
|
185
|
+
@pytest.mark.verifies_usecase("UC-110-1.1.1")
|
186
|
+
def test_add_onsite_replica_with_dummy_file(
|
187
|
+
test_file: tuple[Path, str],
|
188
|
+
test_scope: str,
|
189
|
+
tmp_path: Path,
|
190
|
+
storage_mount_path,
|
191
|
+
test_vo: str,
|
192
|
+
caplog,
|
193
|
+
):
|
194
|
+
"""Test the add_onsite_replica method of IngestionClient using a dummy file."""
|
195
|
+
|
196
|
+
ingestion_client = IngestionClient(
|
197
|
+
storage_mount_path, ONSITE_RSE, scope=test_scope, vo=test_vo
|
198
|
+
)
|
199
|
+
|
200
|
+
acada_path, test_file_content = test_file
|
201
|
+
# Use add_onsite_replica to register the replica
|
202
|
+
lfn = ingestion_client.add_onsite_replica(acada_path=acada_path)
|
203
|
+
|
204
|
+
# Verify the LFN matches the expected LFN
|
205
|
+
expected_lfn = ingestion_client.acada_to_lfn(acada_path)
|
206
|
+
assert lfn == expected_lfn, f"Expected LFN {expected_lfn}, got {lfn}"
|
207
|
+
|
208
|
+
# Download the file using the LFN
|
209
|
+
download_spec = {
|
210
|
+
"did": f"{ingestion_client.scope}:{lfn}",
|
211
|
+
"base_dir": str(tmp_path),
|
212
|
+
"no_subdir": True,
|
213
|
+
}
|
214
|
+
download_client = DownloadClient()
|
215
|
+
download_client.download_dids([download_spec])
|
216
|
+
|
217
|
+
# Verify the downloaded file
|
218
|
+
download_path = tmp_path / lfn.lstrip("/")
|
219
|
+
assert download_path.is_file(), f"Download failed at {download_path}"
|
220
|
+
|
221
|
+
downloaded_content = download_path.read_text()
|
222
|
+
assert downloaded_content == test_file_content, (
|
223
|
+
f"Downloaded file content does not match the original. "
|
224
|
+
f"Expected: {test_file_content}, Got: {downloaded_content}"
|
225
|
+
)
|
226
|
+
|
227
|
+
# Check for don't ingest again if its already registered
|
228
|
+
caplog.clear()
|
229
|
+
lfn = ingestion_client.add_onsite_replica(acada_path=acada_path)
|
230
|
+
assert f"Replica already exists for lfn '{lfn}', skipping" in [
|
231
|
+
r.message for r in caplog.records
|
232
|
+
]
|
233
|
+
|
234
|
+
|
235
|
+
def test_rses():
|
236
|
+
"""Test that the expected RSEs are configured."""
|
237
|
+
client = Client()
|
238
|
+
result = list(client.list_rses())
|
239
|
+
|
240
|
+
rses = [r["rse"] for r in result]
|
241
|
+
assert ONSITE_RSE in rses, f"Expected RSE {ONSITE_RSE} not found in {rses}"
|
242
|
+
assert OFFSITE_RSE_1 in rses, f"Expected RSE {OFFSITE_RSE_1} not found in {rses}"
|
243
|
+
assert OFFSITE_RSE_2 in rses, f"Expected RSE {OFFSITE_RSE_2} not found in {rses}"
|
244
|
+
|
245
|
+
|
246
|
+
@pytest.fixture
|
247
|
+
def pre_existing_lfn(
|
248
|
+
test_file: tuple[Path, str],
|
249
|
+
test_scope: str,
|
250
|
+
test_vo: str,
|
251
|
+
) -> str:
|
252
|
+
"""Fixture to provide an LFN for a replica pre-registered in Rucio without using IngestionClient."""
|
253
|
+
|
254
|
+
# Construct the LFN manually based on the test file and scope
|
255
|
+
acada_path, _ = test_file
|
256
|
+
relative_path = str(acada_path).split(f"{test_vo}/{test_scope}/", 1)[-1]
|
257
|
+
lfn = f"/{test_vo}/{test_scope}/{relative_path}"
|
258
|
+
checksum = adler32(acada_path)
|
259
|
+
|
260
|
+
# Construct the DID
|
261
|
+
did = {"scope": test_scope, "name": lfn}
|
262
|
+
|
263
|
+
# Register the replica directly using ReplicaClient
|
264
|
+
replica_client = ReplicaClient()
|
265
|
+
replica = {
|
266
|
+
"scope": test_scope,
|
267
|
+
"name": lfn,
|
268
|
+
"bytes": acada_path.stat().st_size, # File size
|
269
|
+
"adler32": checksum,
|
270
|
+
}
|
271
|
+
try:
|
272
|
+
replica_client.add_replicas(rse=ONSITE_RSE, files=[replica])
|
273
|
+
except RucioException as e:
|
274
|
+
LOGGER.error(
|
275
|
+
"Failed to pre-register replica for LFN %s on %s: %s",
|
276
|
+
lfn,
|
277
|
+
ONSITE_RSE,
|
278
|
+
str(e),
|
279
|
+
)
|
280
|
+
raise
|
281
|
+
|
282
|
+
# Verify the replica is registered
|
283
|
+
replicas = list(replica_client.list_replicas(dids=[did]))
|
284
|
+
assert (
|
285
|
+
replicas
|
286
|
+
), f"Failed to verify pre-registration of replica for LFN {lfn} on {ONSITE_RSE}"
|
287
|
+
|
288
|
+
return lfn
|
289
|
+
|
290
|
+
|
291
|
+
@pytest.mark.usefixtures("_auth_proxy")
|
292
|
+
@pytest.mark.verifies_usecase("UC-110-1.6")
|
293
|
+
def test_add_offsite_replication_rules(
|
294
|
+
pre_existing_lfn: str,
|
295
|
+
test_scope: str,
|
296
|
+
test_vo: str,
|
297
|
+
storage_mount_path: Path,
|
298
|
+
tmp_path: Path,
|
299
|
+
test_file: tuple[Path, str],
|
300
|
+
caplog,
|
301
|
+
):
|
302
|
+
"""Test the add_offsite_replication_rules method of IngestionClient."""
|
303
|
+
ingestion_client = IngestionClient(
|
304
|
+
storage_mount_path, ONSITE_RSE, scope=test_scope, vo=test_vo
|
305
|
+
)
|
306
|
+
caplog.set_level(logging.DEBUG)
|
307
|
+
|
308
|
+
# Replicate the ACADA file to two offsite RSEs
|
309
|
+
lfn = pre_existing_lfn
|
310
|
+
did = {"scope": test_scope, "name": lfn}
|
311
|
+
|
312
|
+
_, test_file_content = test_file # Get the test file content
|
313
|
+
|
314
|
+
offsite_rse_expression = "OFFSITE"
|
315
|
+
copies = 2
|
316
|
+
rule_ids = ingestion_client.add_offsite_replication_rules(
|
317
|
+
lfn=lfn,
|
318
|
+
offsite_rse_expression=offsite_rse_expression,
|
319
|
+
copies=copies,
|
320
|
+
lifetime=None,
|
321
|
+
)
|
322
|
+
|
323
|
+
rule_id_offsite_1 = rule_ids[0]
|
324
|
+
rule_id_offsite_2 = rule_ids[1]
|
325
|
+
rule_client = RuleClient()
|
326
|
+
|
327
|
+
# Wait for the first offsite rule to complete (OFFSITE_RSE_1)
|
328
|
+
wait_for_replication_status(rule_client, rule_id_offsite_1, expected_status="OK")
|
329
|
+
|
330
|
+
# Verify the replica exists on either OFFSITE_RSE_1 or OFFSITE_RSE_2 after the first rule
|
331
|
+
replica_client = ReplicaClient()
|
332
|
+
replicas = next(replica_client.list_replicas(dids=[did]))
|
333
|
+
states = replicas.get("states", {})
|
334
|
+
assert (
|
335
|
+
states.get(OFFSITE_RSE_1) == "AVAILABLE"
|
336
|
+
or states.get(OFFSITE_RSE_2) == "AVAILABLE"
|
337
|
+
), f"Expected replica on either {OFFSITE_RSE_1} or {OFFSITE_RSE_2} to be AVAILABLE after first rule: {states}"
|
338
|
+
|
339
|
+
# Manually trigger the judge-repairer to ensure the second rule doesn't get stuck
|
340
|
+
trigger_judge_repairer()
|
341
|
+
|
342
|
+
# Wait for the second offsite rule to complete (OFFSITE_RSE_2)
|
343
|
+
wait_for_replication_status(rule_client, rule_id_offsite_2, expected_status="OK")
|
344
|
+
|
345
|
+
# Verify the replica exists on all RSEs
|
346
|
+
replica_client = ReplicaClient()
|
347
|
+
replicas = next(replica_client.list_replicas(dids=[did]))
|
348
|
+
states = replicas.get("states", {})
|
349
|
+
LOGGER.info(
|
350
|
+
"Replica states for DID %s in test_replicate_acada_data_to_offsite: %s",
|
351
|
+
did,
|
352
|
+
states,
|
353
|
+
)
|
354
|
+
assert (
|
355
|
+
states.get(ONSITE_RSE) == "AVAILABLE"
|
356
|
+
), f"Expected replica on {ONSITE_RSE} to be AVAILABLE: {states}"
|
357
|
+
assert (
|
358
|
+
states.get(OFFSITE_RSE_1) == "AVAILABLE"
|
359
|
+
), f"Expected replica on {OFFSITE_RSE_1} to be AVAILABLE: {states}"
|
360
|
+
assert (
|
361
|
+
states.get(OFFSITE_RSE_2) == "AVAILABLE"
|
362
|
+
), f"Expected replica on {OFFSITE_RSE_2} to be AVAILABLE: {states}"
|
363
|
+
|
364
|
+
# Download the file from OFFSITE_RSE_2 to verify its content
|
365
|
+
download_spec = {
|
366
|
+
"did": f"{test_scope}:{lfn}",
|
367
|
+
"base_dir": str(tmp_path),
|
368
|
+
"no_subdir": True,
|
369
|
+
"rse": OFFSITE_RSE_2,
|
370
|
+
}
|
371
|
+
download_client = DownloadClient()
|
372
|
+
download_client.download_dids([download_spec])
|
373
|
+
|
374
|
+
# Verify the downloaded file content
|
375
|
+
download_path = tmp_path / lfn.lstrip("/")
|
376
|
+
assert download_path.is_file(), f"Download failed at {download_path}"
|
377
|
+
downloaded_content = download_path.read_text()
|
378
|
+
assert downloaded_content == test_file_content, (
|
379
|
+
f"Downloaded file content does not match the original. "
|
380
|
+
f"Expected: {test_file_content}, Got: {downloaded_content}"
|
381
|
+
)
|
382
|
+
|
383
|
+
|
384
|
+
@pytest.mark.usefixtures("_auth_proxy")
|
385
|
+
@pytest.mark.verifies_usecase("UC-110-1.6")
|
386
|
+
def test_add_offsite_replication_rules_single_copy(
|
387
|
+
pre_existing_lfn: str,
|
388
|
+
test_scope: str,
|
389
|
+
test_vo: str,
|
390
|
+
storage_mount_path: Path,
|
391
|
+
tmp_path: Path,
|
392
|
+
test_file: tuple[Path, str],
|
393
|
+
caplog,
|
394
|
+
):
|
395
|
+
"""Test the add_offsite_replication_rules method of IngestionClient with a single copy (copies=1)."""
|
396
|
+
ingestion_client = IngestionClient(
|
397
|
+
storage_mount_path, ONSITE_RSE, scope=test_scope, vo=test_vo
|
398
|
+
)
|
399
|
+
caplog.set_level(logging.DEBUG)
|
400
|
+
|
401
|
+
# Replicate the ACADA file to one offsite RSE
|
402
|
+
lfn = pre_existing_lfn
|
403
|
+
did = {"scope": test_scope, "name": lfn}
|
404
|
+
|
405
|
+
_, test_file_content = test_file
|
406
|
+
|
407
|
+
offsite_rse_expression = "OFFSITE"
|
408
|
+
copies = 1
|
409
|
+
rule_ids = ingestion_client.add_offsite_replication_rules(
|
410
|
+
lfn=lfn,
|
411
|
+
offsite_rse_expression=offsite_rse_expression,
|
412
|
+
copies=copies,
|
413
|
+
lifetime=None,
|
414
|
+
)
|
415
|
+
|
416
|
+
# Verify that only one rule was created
|
417
|
+
assert (
|
418
|
+
len(rule_ids) == 1
|
419
|
+
), f"Expected exactly 1 rule ID, got {len(rule_ids)}: {rule_ids}"
|
420
|
+
rule_id_offsite_1 = rule_ids[0]
|
421
|
+
rule_client = RuleClient()
|
422
|
+
|
423
|
+
# Wait for the offsite rule to complete
|
424
|
+
wait_for_replication_status(rule_client, rule_id_offsite_1, expected_status="OK")
|
425
|
+
|
426
|
+
# Verify the replica exists on exactly one of the offsite RSEs (either OFFSITE_RSE_1 or OFFSITE_RSE_2)
|
427
|
+
replica_client = ReplicaClient()
|
428
|
+
replicas = next(replica_client.list_replicas(dids=[did]))
|
429
|
+
states = replicas.get("states", {})
|
430
|
+
LOGGER.info(
|
431
|
+
"Replica states for DID %s in test_add_offsite_replication_rules_single_copy: %s",
|
432
|
+
did,
|
433
|
+
states,
|
434
|
+
)
|
435
|
+
# Check that the replica exists on exactly one offsite RSE
|
436
|
+
offsite_replica_count = sum(
|
437
|
+
1 for rse in [OFFSITE_RSE_1, OFFSITE_RSE_2] if states.get(rse) == "AVAILABLE"
|
438
|
+
)
|
439
|
+
assert (
|
440
|
+
offsite_replica_count == 1
|
441
|
+
), f"Expected exactly 1 offsite replica (on either {OFFSITE_RSE_1} or {OFFSITE_RSE_2}), got {offsite_replica_count}: {states}"
|
442
|
+
|
443
|
+
# Determine which offsite RSE the replica was created on
|
444
|
+
target_offsite_rse = (
|
445
|
+
OFFSITE_RSE_1 if states.get(OFFSITE_RSE_1) == "AVAILABLE" else OFFSITE_RSE_2
|
446
|
+
)
|
447
|
+
|
448
|
+
# Download the file from the target offsite RSE to verify its content
|
449
|
+
download_spec = {
|
450
|
+
"did": f"{test_scope}:{lfn}",
|
451
|
+
"base_dir": str(tmp_path),
|
452
|
+
"no_subdir": True,
|
453
|
+
"rse": target_offsite_rse,
|
454
|
+
}
|
455
|
+
download_client = DownloadClient()
|
456
|
+
download_client.download_dids([download_spec])
|
457
|
+
|
458
|
+
# Verify the downloaded file content
|
459
|
+
download_path = tmp_path / lfn.lstrip("/")
|
460
|
+
assert download_path.is_file(), f"Download failed at {download_path}"
|
461
|
+
downloaded_content = download_path.read_text()
|
462
|
+
assert downloaded_content == test_file_content, (
|
463
|
+
f"Downloaded file content does not match the original. "
|
464
|
+
f"Expected: {test_file_content}, Got: {downloaded_content}"
|
465
|
+
)
|
466
|
+
|
467
|
+
|
468
|
+
def test_verify_fits_file(tel_events_test_file):
|
469
|
+
from bdms.acada_ingestion import verify_fits_checksum
|
470
|
+
|
471
|
+
with fits.open(tel_events_test_file) as hdul:
|
472
|
+
verify_fits_checksum(hdul)
|
473
|
+
|
474
|
+
|
475
|
+
@pytest.fixture
|
476
|
+
def broken_checksum(tmp_path):
|
477
|
+
# create a fits file with a broken checksum
|
478
|
+
path = tmp_path / "invalid.fits"
|
479
|
+
|
480
|
+
table = Table({"foo": [1, 2, 3], "bar": [4.0, 5.0, 6.0]})
|
481
|
+
hdul = fits.HDUList([fits.PrimaryHDU(), fits.BinTableHDU(table)])
|
482
|
+
hdul.writeto(path, checksum=True)
|
483
|
+
|
484
|
+
# break it
|
485
|
+
with path.open("rb+") as f:
|
486
|
+
# FITS files are stored in blocks of 2880 bytes
|
487
|
+
# first chunk should be the primary header
|
488
|
+
# second chunk the header of the bintable
|
489
|
+
# third chunk the payload of the bintable
|
490
|
+
# we write garbage somewhere into the payload of the table
|
491
|
+
f.seek(2 * 2880 + 10)
|
492
|
+
f.write(b"\x12\x34\xff")
|
493
|
+
return path
|
494
|
+
|
495
|
+
|
496
|
+
def test_verify_fits_file_invalid_checksum(broken_checksum):
|
497
|
+
from bdms.acada_ingestion import FITSVerificationError, verify_fits_checksum
|
498
|
+
|
499
|
+
with fits.open(broken_checksum) as hdul:
|
500
|
+
with pytest.raises(FITSVerificationError, match="CHECKSUM verification failed"):
|
501
|
+
verify_fits_checksum(hdul)
|
@@ -1,11 +1,11 @@
|
|
1
|
-
import time
|
2
|
-
|
3
1
|
import pytest
|
4
2
|
from rucio.client import Client
|
5
3
|
from rucio.client.client import ReplicaClient, RuleClient
|
6
4
|
from rucio.client.didclient import DIDClient
|
7
5
|
from rucio.client.uploadclient import UploadClient
|
8
6
|
|
7
|
+
from bdms.tests.utils import wait_for_replication_status
|
8
|
+
|
9
9
|
|
10
10
|
def test_server_version():
|
11
11
|
"""Test the expected version of rucio is running"""
|
@@ -67,40 +67,22 @@ def test_upload_file(test_vo, test_scope, tmp_path):
|
|
67
67
|
assert upload_client.upload([upload_spec]) == 0
|
68
68
|
|
69
69
|
|
70
|
+
"""
|
70
71
|
@pytest.mark.parametrize(
|
71
|
-
"timeout",
|
72
|
+
"timeout,poll_interval",
|
72
73
|
[
|
73
74
|
pytest.param(
|
74
75
|
60,
|
76
|
+
5,
|
75
77
|
marks=pytest.mark.xfail(
|
76
78
|
reason="sometimes there is an extra 300s timeout somewhere in FTS"
|
77
79
|
),
|
80
|
+
id="timeout-60",
|
78
81
|
),
|
79
|
-
(600,),
|
82
|
+
(600, 5),
|
80
83
|
],
|
81
84
|
)
|
82
|
-
|
83
|
-
rule_client = RuleClient()
|
84
|
-
|
85
|
-
start = time.perf_counter()
|
86
|
-
|
87
|
-
current_status = None
|
88
|
-
result = None
|
89
|
-
|
90
|
-
while (time.perf_counter() - start) < timeout:
|
91
|
-
result = rule_client.get_replication_rule(rule)
|
92
|
-
current_status = result["state"]
|
93
|
-
|
94
|
-
if current_status == status:
|
95
|
-
return
|
96
|
-
|
97
|
-
time.sleep(poll)
|
98
|
-
|
99
|
-
msg = (
|
100
|
-
f"Rule {rule} did not reach status '{status}' within {timeout} seconds."
|
101
|
-
f" Current status is '{current_status}'.\nFull output: {result}"
|
102
|
-
)
|
103
|
-
raise TimeoutError(msg)
|
85
|
+
"""
|
104
86
|
|
105
87
|
|
106
88
|
@pytest.mark.usefixtures("_auth_proxy")
|
@@ -138,6 +120,12 @@ def test_replication(test_vo, test_scope, tmp_path):
|
|
138
120
|
dids=dids, copies=1, rse_expression=replica_rse
|
139
121
|
)[0]
|
140
122
|
|
141
|
-
wait_for_replication_status(
|
123
|
+
wait_for_replication_status(
|
124
|
+
rule_client,
|
125
|
+
rule_id=rule,
|
126
|
+
expected_status="OK",
|
127
|
+
timeout=600,
|
128
|
+
poll_interval=5,
|
129
|
+
)
|
142
130
|
replicas = next(replica_client.list_replicas(dids))
|
143
131
|
assert replicas["states"] == {"STORAGE-1": "AVAILABLE", "STORAGE-2": "AVAILABLE"}
|
@@ -0,0 +1,119 @@
|
|
1
|
+
import os
|
2
|
+
import subprocess as sp
|
3
|
+
from datetime import datetime
|
4
|
+
from pathlib import Path
|
5
|
+
from secrets import token_hex
|
6
|
+
|
7
|
+
import pytest
|
8
|
+
from rucio.client.rseclient import RSEClient
|
9
|
+
|
10
|
+
from .conftest import STORAGE_HOSTNAME, STORAGE_PROTOCOL
|
11
|
+
|
12
|
+
# Constants for RSEs and expected attributes
|
13
|
+
RSE_CONFIG = {
|
14
|
+
"STORAGE-1": {"ONSITE": True, "OFFSITE": None},
|
15
|
+
"STORAGE-2": {"ONSITE": None, "OFFSITE": True},
|
16
|
+
"STORAGE-3": {"ONSITE": None, "OFFSITE": True},
|
17
|
+
}
|
18
|
+
|
19
|
+
|
20
|
+
def test_shared_storage(storage_mount_path: Path) -> Path:
|
21
|
+
"""Ensure shared storage directory exists before any test runs"""
|
22
|
+
assert (
|
23
|
+
storage_mount_path.exists()
|
24
|
+
), f"Shared storage {storage_mount_path} is not available on the client"
|
25
|
+
|
26
|
+
|
27
|
+
@pytest.fixture(scope="session")
|
28
|
+
def test_file(storage_mount_path, test_scope) -> tuple[Path, str]:
|
29
|
+
"""Create a test file in the shared storage and return its path and content"""
|
30
|
+
unique_id = f"{datetime.now():%Y%m%d_%H%M%S}_{token_hex(8)}"
|
31
|
+
test_file_name = f"/ctao.dpps.test/{test_scope}/testfile_{unique_id}.txt"
|
32
|
+
test_file_path = storage_mount_path / test_file_name.lstrip("/")
|
33
|
+
test_file_content = f"This is a test file {unique_id}"
|
34
|
+
test_file_path.parent.mkdir(parents=True, exist_ok=True)
|
35
|
+
test_file_path.write_text(test_file_content)
|
36
|
+
assert test_file_path.exists(), f"Test file {test_file_path} was not created successfully at {storage_mount_path}"
|
37
|
+
|
38
|
+
return test_file_name, test_file_content
|
39
|
+
|
40
|
+
|
41
|
+
def test_file_access_from_onsite_storage_using_gfal(test_file: tuple[Path, str]):
|
42
|
+
"""Verify that the file is accessible from the onsite storage pod using gfal-ls"""
|
43
|
+
test_file_lfn, _ = test_file
|
44
|
+
test_file_name = os.path.basename(test_file_lfn)
|
45
|
+
|
46
|
+
gfal_url = f"{STORAGE_PROTOCOL}://{STORAGE_HOSTNAME}/rucio{test_file_lfn}"
|
47
|
+
cmd = ["gfal-ls", gfal_url]
|
48
|
+
try:
|
49
|
+
output = sp.run(cmd, capture_output=True, text=True, check=True)
|
50
|
+
debug = True # Adjust as needed
|
51
|
+
if debug:
|
52
|
+
print(f"GFAL Output: {output.stdout.strip()}")
|
53
|
+
stdout = output.stdout.strip()
|
54
|
+
except sp.CalledProcessError as e:
|
55
|
+
pytest.fail(
|
56
|
+
f"gfal-ls failed for {gfal_url}:\nSTDERR: {e.stderr.strip()}\nSTDOUT: {e.stdout.strip()}"
|
57
|
+
)
|
58
|
+
|
59
|
+
assert any(
|
60
|
+
test_file_name in line for line in stdout.splitlines()
|
61
|
+
), f"File {test_file_name} not accessible; gfal-ls output: {stdout!r}"
|
62
|
+
|
63
|
+
|
64
|
+
@pytest.mark.usefixtures("_auth_proxy")
|
65
|
+
def test_rse_attributes():
|
66
|
+
"""Verify onsite and offsite RSE attributes set by setup_rucio.sh during the bootstrap job deployment
|
67
|
+
|
68
|
+
Ensures:
|
69
|
+
- STORAGE-1 has onsite=True and no offsite=True
|
70
|
+
- STORAGE-2 and STORAGE-3 have offsite=True and no onsite=True
|
71
|
+
|
72
|
+
Raises:
|
73
|
+
pytest.fail: If RSE details cannot be retrieved (in case of RSEs not found or Rucio server connectivity issues)
|
74
|
+
AssertionError: If attribute values don't match the expected ones
|
75
|
+
"""
|
76
|
+
|
77
|
+
rse_client = RSEClient()
|
78
|
+
|
79
|
+
for rse_name, expected_attrs in RSE_CONFIG.items():
|
80
|
+
try:
|
81
|
+
# Verify RSE exists
|
82
|
+
rse_details = rse_client.get_rse(rse_name)
|
83
|
+
print(f"{rse_name} metadata: {rse_details}")
|
84
|
+
|
85
|
+
# Fetch attributes
|
86
|
+
attrs = rse_client.list_rse_attributes(rse_name)
|
87
|
+
print(f"{rse_name} attributes: {attrs}")
|
88
|
+
|
89
|
+
# Verify RSE onsite attribute
|
90
|
+
onsite_value = attrs.get("ONSITE")
|
91
|
+
expected_onsite = expected_attrs["ONSITE"]
|
92
|
+
assert onsite_value == expected_onsite, (
|
93
|
+
f"{rse_name} onsite attribute mismatch: "
|
94
|
+
f"expected {expected_onsite!r}, got {onsite_value!r}. "
|
95
|
+
f"Full attributes: {attrs}"
|
96
|
+
)
|
97
|
+
|
98
|
+
# Verify RSE offsite attribute
|
99
|
+
offsite_value = attrs.get("OFFSITE")
|
100
|
+
expected_offsite = expected_attrs["OFFSITE"]
|
101
|
+
if expected_offsite is None:
|
102
|
+
assert offsite_value is not True, (
|
103
|
+
f"{rse_name} should not have offsite=True, "
|
104
|
+
f"got {offsite_value!r}. Full attributes: {attrs}"
|
105
|
+
)
|
106
|
+
else:
|
107
|
+
assert offsite_value == expected_offsite, (
|
108
|
+
f"{rse_name} offsite attribute mismatch: "
|
109
|
+
f"expected {expected_offsite!r}, got {offsite_value!r}. "
|
110
|
+
f"Full attributes: {attrs}"
|
111
|
+
)
|
112
|
+
|
113
|
+
print(f"{rse_name} passed attribute tests")
|
114
|
+
|
115
|
+
except Exception as e:
|
116
|
+
pytest.fail(
|
117
|
+
f"Failed to retrieve RSE details for {rse_name}: {str(e)}. "
|
118
|
+
"Check Rucio server connectivity or RSE existence"
|
119
|
+
)
|