ctao-bdms-clients 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bdms/_version.py +2 -2
- bdms/acada_ingestion.py +479 -0
- bdms/extract_fits_metadata.py +134 -0
- bdms/tests/conftest.py +64 -0
- bdms/tests/test_acada_ingestion.py +526 -0
- bdms/tests/test_basic_rucio_functionality.py +14 -27
- bdms/tests/test_extract_fits_metadata.py +97 -0
- bdms/tests/test_onsite_storage.py +100 -0
- bdms/tests/utils.py +130 -0
- {ctao_bdms_clients-0.1.0.dist-info → ctao_bdms_clients-0.2.0.dist-info}/METADATA +12 -5
- ctao_bdms_clients-0.2.0.dist-info/RECORD +20 -0
- {ctao_bdms_clients-0.1.0.dist-info → ctao_bdms_clients-0.2.0.dist-info}/WHEEL +1 -1
- ctao_bdms_clients-0.1.0.dist-info/RECORD +0 -14
- {ctao_bdms_clients-0.1.0.dist-info → ctao_bdms_clients-0.2.0.dist-info/licenses}/LICENSE +0 -0
- {ctao_bdms_clients-0.1.0.dist-info → ctao_bdms_clients-0.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,526 @@
|
|
1
|
+
"""Tests for onsite ingestion and replication into the BDMS system using the IngestionClient.
|
2
|
+
|
3
|
+
This module contains tests for the IngestionClient class, focusing on the conversion of ACADA paths to Logical File Names (LFNs), the registration of replicas in Rucio,
|
4
|
+
and the replication of data between Rucio storage elements (RSEs).
|
5
|
+
"""
|
6
|
+
|
7
|
+
import logging
|
8
|
+
import subprocess
|
9
|
+
from pathlib import Path
|
10
|
+
from shutil import copy2
|
11
|
+
|
12
|
+
import pytest
|
13
|
+
from astropy.io import fits
|
14
|
+
from astropy.table import Table
|
15
|
+
from rucio.client import Client
|
16
|
+
from rucio.client.downloadclient import DownloadClient
|
17
|
+
from rucio.client.replicaclient import ReplicaClient
|
18
|
+
from rucio.client.ruleclient import RuleClient
|
19
|
+
from rucio.common.exception import RucioException
|
20
|
+
from rucio.common.utils import adler32
|
21
|
+
|
22
|
+
from bdms.acada_ingestion import IngestionClient
|
23
|
+
from bdms.tests.utils import reset_xrootd_permissions, wait_for_replication_status
|
24
|
+
|
25
|
+
LOGGER = logging.getLogger(__name__)
|
26
|
+
|
27
|
+
ONSITE_RSE = "STORAGE-1"
|
28
|
+
OFFSITE_RSE_1 = "STORAGE-2"
|
29
|
+
OFFSITE_RSE_2 = "STORAGE-3"
|
30
|
+
|
31
|
+
|
32
|
+
def test_shared_storage(storage_mount_path: Path):
|
33
|
+
"""Test that the shared storage path is available."""
|
34
|
+
|
35
|
+
assert (
|
36
|
+
storage_mount_path.exists()
|
37
|
+
), f"Shared storage {storage_mount_path} is not available on the client"
|
38
|
+
|
39
|
+
|
40
|
+
def trigger_judge_repairer() -> None:
|
41
|
+
"""Trigger the rucio-judge-repairer daemon to run once and fix any STUCK rules."""
|
42
|
+
|
43
|
+
try:
|
44
|
+
cmd = [
|
45
|
+
"./kubectl",
|
46
|
+
"exec",
|
47
|
+
"deployment/bdms-judge-evaluator",
|
48
|
+
"--",
|
49
|
+
"/usr/local/bin/rucio-judge-repairer",
|
50
|
+
"--run-once",
|
51
|
+
]
|
52
|
+
result = subprocess.run(
|
53
|
+
cmd,
|
54
|
+
check=True,
|
55
|
+
capture_output=True,
|
56
|
+
text=True,
|
57
|
+
)
|
58
|
+
LOGGER.info("Triggered rucio-judge-repairer daemon: %s", result.stdout)
|
59
|
+
except FileNotFoundError as e:
|
60
|
+
LOGGER.error("kubectl command not found: %s", str(e))
|
61
|
+
raise RuntimeError(
|
62
|
+
"kubectl command not found. Ensure kubectl is in the PATH or working directory."
|
63
|
+
) from e
|
64
|
+
except subprocess.CalledProcessError as e:
|
65
|
+
LOGGER.error("Failed to trigger rucio-judge-repairer daemon: %s", e.stderr)
|
66
|
+
raise
|
67
|
+
|
68
|
+
|
69
|
+
def test_acada_to_lfn(storage_mount_path: Path, test_vo: str):
|
70
|
+
"""Test the acada_to_lfn method of IngestionClient with valid and invalid inputs."""
|
71
|
+
|
72
|
+
ingestion_client = IngestionClient(storage_mount_path, ONSITE_RSE, vo=test_vo)
|
73
|
+
|
74
|
+
# Test Case 1: valid acada_path
|
75
|
+
acada_path = (
|
76
|
+
f"{ingestion_client.data_path}/{ingestion_client.vo}/{ingestion_client.scope}/DL0/LSTN-01/events/2023/10/13/"
|
77
|
+
"Subarray_SWAT_sbid008_obid00081_0.fits.fz"
|
78
|
+
)
|
79
|
+
|
80
|
+
expected_lfn = (
|
81
|
+
f"/{ingestion_client.vo}/{ingestion_client.scope}/DL0/LSTN-01/events/2023/10/13/"
|
82
|
+
"Subarray_SWAT_sbid008_obid00081_0.fits.fz"
|
83
|
+
)
|
84
|
+
lfn = ingestion_client.acada_to_lfn(acada_path=acada_path)
|
85
|
+
|
86
|
+
assert lfn == expected_lfn, f"Expected {expected_lfn}, got {lfn}"
|
87
|
+
|
88
|
+
# Test Case 2: Non-absolute acada_path (empty string)
|
89
|
+
with pytest.raises(ValueError, match="acada_path must be absolute"):
|
90
|
+
ingestion_client.acada_to_lfn(acada_path="")
|
91
|
+
|
92
|
+
# Test Case 3: Non-absolute acada_path (relative path)
|
93
|
+
with pytest.raises(ValueError, match="acada_path must be absolute"):
|
94
|
+
ingestion_client.acada_to_lfn(acada_path="./test.fits")
|
95
|
+
|
96
|
+
# Test Case 4: acada_path not within data_path
|
97
|
+
invalid_acada_path = "/invalid/path/file.fits.fz"
|
98
|
+
with pytest.raises(ValueError, match="is not within data_path"):
|
99
|
+
ingestion_client.acada_to_lfn(acada_path=invalid_acada_path)
|
100
|
+
|
101
|
+
# Test Case 5: acada_path does not start with <vo>/<scope>
|
102
|
+
wrong_prefix_path = (
|
103
|
+
f"{ingestion_client.data_path}/wrong_vo/wrong_scope/DL0/LSTN-01/file.fits.fz"
|
104
|
+
)
|
105
|
+
with pytest.raises(ValueError, match="must start with"):
|
106
|
+
ingestion_client.acada_to_lfn(acada_path=wrong_prefix_path)
|
107
|
+
|
108
|
+
# Test Case 6: acada_path starts with <vo> but wrong <scope>
|
109
|
+
wrong_scope_path = f"{ingestion_client.data_path}/{ingestion_client.vo}/wrong_scope/DL0/LSTN-01/file.fits.fz"
|
110
|
+
with pytest.raises(ValueError, match="must start with"):
|
111
|
+
ingestion_client.acada_to_lfn(acada_path=wrong_scope_path)
|
112
|
+
|
113
|
+
|
114
|
+
@pytest.mark.usefixtures("_auth_proxy")
|
115
|
+
def test_check_replica_exists(
|
116
|
+
storage_mount_path: Path,
|
117
|
+
test_scope: str,
|
118
|
+
onsite_test_file: tuple[Path, str],
|
119
|
+
test_vo: str,
|
120
|
+
):
|
121
|
+
"""Test the check_replica_exists method of IngestionClient."""
|
122
|
+
|
123
|
+
ingestion_client = IngestionClient(
|
124
|
+
storage_mount_path, ONSITE_RSE, scope=test_scope, vo=test_vo
|
125
|
+
)
|
126
|
+
|
127
|
+
acada_path, _ = onsite_test_file
|
128
|
+
|
129
|
+
# Generate the LFN
|
130
|
+
lfn = ingestion_client.acada_to_lfn(acada_path)
|
131
|
+
|
132
|
+
# Test Case 1: No replica exists yet
|
133
|
+
msg = f"Expected no replica for LFN {lfn} before registration"
|
134
|
+
assert not ingestion_client.check_replica_exists(lfn), msg
|
135
|
+
|
136
|
+
# Register the replica in Rucio
|
137
|
+
ingestion_client.add_onsite_replica(acada_path)
|
138
|
+
|
139
|
+
# Test Case 2: Replica exists with a valid PFN
|
140
|
+
msg = f"Expected replica to exist for LFN {lfn} after registration"
|
141
|
+
assert ingestion_client.check_replica_exists(lfn), msg
|
142
|
+
|
143
|
+
# Test Case 3: Non-existent LFN
|
144
|
+
nonexistent_lfn = lfn + ".nonexistent"
|
145
|
+
msg = f"Expected no replica for nonexistent LFN {nonexistent_lfn}"
|
146
|
+
assert not ingestion_client.check_replica_exists(nonexistent_lfn), msg
|
147
|
+
|
148
|
+
|
149
|
+
@pytest.fixture
|
150
|
+
def file_location(request):
|
151
|
+
return request.getfixturevalue(request.param)
|
152
|
+
|
153
|
+
|
154
|
+
@pytest.mark.parametrize(
|
155
|
+
("file_location", "metadata_dict"),
|
156
|
+
[
|
157
|
+
(
|
158
|
+
"subarray_test_file",
|
159
|
+
{
|
160
|
+
"observatory": "CTA",
|
161
|
+
"start_time": "2025-02-04T21:34:05",
|
162
|
+
"end_time": "2025-02-04T21:43:12",
|
163
|
+
"subarray_id": 0,
|
164
|
+
"sb_id": 2000000066,
|
165
|
+
"obs_id": 2000000200,
|
166
|
+
},
|
167
|
+
),
|
168
|
+
(
|
169
|
+
"tel_trigger_test_file",
|
170
|
+
{
|
171
|
+
"observatory": "CTA",
|
172
|
+
"start_time": "2025-02-04T21:34:05",
|
173
|
+
"end_time": "2025-02-04T21:43:11",
|
174
|
+
"tel_ids": [1],
|
175
|
+
"sb_id": 2000000066,
|
176
|
+
"obs_id": 2000000200,
|
177
|
+
},
|
178
|
+
),
|
179
|
+
(
|
180
|
+
"tel_events_test_file",
|
181
|
+
{
|
182
|
+
"observatory": "CTA",
|
183
|
+
"start_time": "2025-04-01T15:25:02",
|
184
|
+
"end_time": "2025-04-01T15:25:03",
|
185
|
+
"sb_id": 0,
|
186
|
+
"obs_id": 0,
|
187
|
+
},
|
188
|
+
),
|
189
|
+
],
|
190
|
+
indirect=["file_location"],
|
191
|
+
)
|
192
|
+
@pytest.mark.usefixtures("_auth_proxy")
|
193
|
+
@pytest.mark.verifies_usecase("UC-110-1.1.1")
|
194
|
+
def test_add_onsite_replica_with_minio_fits_file(
|
195
|
+
file_location: str,
|
196
|
+
metadata_dict: dict,
|
197
|
+
test_scope: str,
|
198
|
+
tmp_path: Path,
|
199
|
+
storage_mount_path,
|
200
|
+
test_vo: str,
|
201
|
+
caplog,
|
202
|
+
):
|
203
|
+
"""Test the add_onsite_replica method of IngestionClient using a dummy file."""
|
204
|
+
|
205
|
+
filename = str(file_location).split("/")[-1]
|
206
|
+
acada_path = storage_mount_path / test_vo / test_scope / filename
|
207
|
+
acada_path.parent.mkdir(parents=True, exist_ok=True)
|
208
|
+
copy2(file_location, str(acada_path))
|
209
|
+
reset_xrootd_permissions(storage_mount_path)
|
210
|
+
|
211
|
+
ingestion_client = IngestionClient(
|
212
|
+
storage_mount_path, ONSITE_RSE, scope=test_scope, vo=test_vo
|
213
|
+
)
|
214
|
+
|
215
|
+
# Use add_onsite_replica to register the replica
|
216
|
+
lfn = ingestion_client.add_onsite_replica(acada_path=acada_path)
|
217
|
+
|
218
|
+
# Verify the LFN matches the expected LFN
|
219
|
+
expected_lfn = ingestion_client.acada_to_lfn(acada_path)
|
220
|
+
assert lfn == expected_lfn, f"Expected LFN {expected_lfn}, got {lfn}"
|
221
|
+
|
222
|
+
# Download the file using the LFN
|
223
|
+
download_spec = {
|
224
|
+
"did": f"{ingestion_client.scope}:{lfn}",
|
225
|
+
"base_dir": str(tmp_path),
|
226
|
+
"no_subdir": True,
|
227
|
+
}
|
228
|
+
download_client = DownloadClient()
|
229
|
+
download_client.download_dids([download_spec])
|
230
|
+
|
231
|
+
# Verify the downloaded file
|
232
|
+
download_path = tmp_path / lfn.lstrip("/")
|
233
|
+
assert download_path.is_file(), f"Download failed at {download_path}"
|
234
|
+
|
235
|
+
assert adler32(download_path) == adler32(
|
236
|
+
file_location
|
237
|
+
), "Downloaded file content does not match the original. "
|
238
|
+
|
239
|
+
# Check for don't ingest again if its already registered
|
240
|
+
caplog.clear()
|
241
|
+
lfn = ingestion_client.add_onsite_replica(acada_path=acada_path)
|
242
|
+
assert f"Replica already exists for lfn '{lfn}', skipping" in [
|
243
|
+
r.message for r in caplog.records
|
244
|
+
]
|
245
|
+
|
246
|
+
# Retrieve metadata using the DIDClient
|
247
|
+
did_client = Client()
|
248
|
+
retrieved_metadata = did_client.get_metadata(
|
249
|
+
scope=ingestion_client.scope, name=lfn, plugin="JSON"
|
250
|
+
)
|
251
|
+
|
252
|
+
# Verify the metadata matches the expected metadata
|
253
|
+
for key, value in metadata_dict.items():
|
254
|
+
assert retrieved_metadata.get(key) == value, (
|
255
|
+
f"Metadata mismatch for key '{key}'. "
|
256
|
+
f"Expected: {value}, Got: {retrieved_metadata.get(key)}"
|
257
|
+
)
|
258
|
+
|
259
|
+
|
260
|
+
def test_rses():
|
261
|
+
"""Test that the expected RSEs are configured."""
|
262
|
+
client = Client()
|
263
|
+
result = list(client.list_rses())
|
264
|
+
|
265
|
+
rses = [r["rse"] for r in result]
|
266
|
+
assert ONSITE_RSE in rses, f"Expected RSE {ONSITE_RSE} not found in {rses}"
|
267
|
+
assert OFFSITE_RSE_1 in rses, f"Expected RSE {OFFSITE_RSE_1} not found in {rses}"
|
268
|
+
assert OFFSITE_RSE_2 in rses, f"Expected RSE {OFFSITE_RSE_2} not found in {rses}"
|
269
|
+
|
270
|
+
|
271
|
+
@pytest.fixture
|
272
|
+
def pre_existing_lfn(
|
273
|
+
onsite_test_file: tuple[Path, str],
|
274
|
+
test_scope: str,
|
275
|
+
test_vo: str,
|
276
|
+
) -> str:
|
277
|
+
"""Fixture to provide an LFN for a replica pre-registered in Rucio without using IngestionClient."""
|
278
|
+
|
279
|
+
# Construct the LFN manually based on the test file and scope
|
280
|
+
acada_path, _ = onsite_test_file
|
281
|
+
relative_path = str(acada_path).split(f"{test_vo}/{test_scope}/", 1)[-1]
|
282
|
+
lfn = f"/{test_vo}/{test_scope}/{relative_path}"
|
283
|
+
checksum = adler32(acada_path)
|
284
|
+
|
285
|
+
# Construct the DID
|
286
|
+
did = {"scope": test_scope, "name": lfn}
|
287
|
+
|
288
|
+
# Register the replica directly using ReplicaClient
|
289
|
+
replica_client = ReplicaClient()
|
290
|
+
replica = {
|
291
|
+
"scope": test_scope,
|
292
|
+
"name": lfn,
|
293
|
+
"bytes": acada_path.stat().st_size, # File size
|
294
|
+
"adler32": checksum,
|
295
|
+
}
|
296
|
+
try:
|
297
|
+
replica_client.add_replicas(rse=ONSITE_RSE, files=[replica])
|
298
|
+
except RucioException as e:
|
299
|
+
LOGGER.error(
|
300
|
+
"Failed to pre-register replica for LFN %s on %s: %s",
|
301
|
+
lfn,
|
302
|
+
ONSITE_RSE,
|
303
|
+
str(e),
|
304
|
+
)
|
305
|
+
raise
|
306
|
+
|
307
|
+
# Verify the replica is registered
|
308
|
+
replicas = list(replica_client.list_replicas(dids=[did]))
|
309
|
+
assert (
|
310
|
+
replicas
|
311
|
+
), f"Failed to verify pre-registration of replica for LFN {lfn} on {ONSITE_RSE}"
|
312
|
+
|
313
|
+
return lfn
|
314
|
+
|
315
|
+
|
316
|
+
@pytest.mark.usefixtures("_auth_proxy")
|
317
|
+
@pytest.mark.verifies_usecase("UC-110-1.6")
|
318
|
+
def test_add_offsite_replication_rules(
|
319
|
+
pre_existing_lfn: str,
|
320
|
+
test_scope: str,
|
321
|
+
test_vo: str,
|
322
|
+
storage_mount_path: Path,
|
323
|
+
tmp_path: Path,
|
324
|
+
onsite_test_file: tuple[Path, str],
|
325
|
+
caplog,
|
326
|
+
):
|
327
|
+
"""Test the add_offsite_replication_rules method of IngestionClient."""
|
328
|
+
ingestion_client = IngestionClient(
|
329
|
+
storage_mount_path, ONSITE_RSE, scope=test_scope, vo=test_vo
|
330
|
+
)
|
331
|
+
caplog.set_level(logging.DEBUG)
|
332
|
+
|
333
|
+
# Replicate the ACADA file to two offsite RSEs
|
334
|
+
lfn = pre_existing_lfn
|
335
|
+
did = {"scope": test_scope, "name": lfn}
|
336
|
+
|
337
|
+
_, test_file_content = onsite_test_file # Get the test file content
|
338
|
+
|
339
|
+
offsite_rse_expression = "OFFSITE"
|
340
|
+
copies = 2
|
341
|
+
rule_ids = ingestion_client.add_offsite_replication_rules(
|
342
|
+
lfn=lfn,
|
343
|
+
offsite_rse_expression=offsite_rse_expression,
|
344
|
+
copies=copies,
|
345
|
+
lifetime=None,
|
346
|
+
)
|
347
|
+
|
348
|
+
rule_id_offsite_1 = rule_ids[0]
|
349
|
+
rule_id_offsite_2 = rule_ids[1]
|
350
|
+
rule_client = RuleClient()
|
351
|
+
|
352
|
+
# Wait for the first offsite rule to complete (OFFSITE_RSE_1)
|
353
|
+
wait_for_replication_status(rule_client, rule_id_offsite_1, expected_status="OK")
|
354
|
+
|
355
|
+
# Verify the replica exists on either OFFSITE_RSE_1 or OFFSITE_RSE_2 after the first rule
|
356
|
+
replica_client = ReplicaClient()
|
357
|
+
replicas = next(replica_client.list_replicas(dids=[did]))
|
358
|
+
states = replicas.get("states", {})
|
359
|
+
assert (
|
360
|
+
states.get(OFFSITE_RSE_1) == "AVAILABLE"
|
361
|
+
or states.get(OFFSITE_RSE_2) == "AVAILABLE"
|
362
|
+
), f"Expected replica on either {OFFSITE_RSE_1} or {OFFSITE_RSE_2} to be AVAILABLE after first rule: {states}"
|
363
|
+
|
364
|
+
# Manually trigger the judge-repairer to ensure the second rule doesn't get stuck
|
365
|
+
trigger_judge_repairer()
|
366
|
+
|
367
|
+
# Wait for the second offsite rule to complete (OFFSITE_RSE_2)
|
368
|
+
wait_for_replication_status(rule_client, rule_id_offsite_2, expected_status="OK")
|
369
|
+
|
370
|
+
# Verify the replica exists on all RSEs
|
371
|
+
replica_client = ReplicaClient()
|
372
|
+
replicas = next(replica_client.list_replicas(dids=[did]))
|
373
|
+
states = replicas.get("states", {})
|
374
|
+
LOGGER.info(
|
375
|
+
"Replica states for DID %s in test_replicate_acada_data_to_offsite: %s",
|
376
|
+
did,
|
377
|
+
states,
|
378
|
+
)
|
379
|
+
assert (
|
380
|
+
states.get(ONSITE_RSE) == "AVAILABLE"
|
381
|
+
), f"Expected replica on {ONSITE_RSE} to be AVAILABLE: {states}"
|
382
|
+
assert (
|
383
|
+
states.get(OFFSITE_RSE_1) == "AVAILABLE"
|
384
|
+
), f"Expected replica on {OFFSITE_RSE_1} to be AVAILABLE: {states}"
|
385
|
+
assert (
|
386
|
+
states.get(OFFSITE_RSE_2) == "AVAILABLE"
|
387
|
+
), f"Expected replica on {OFFSITE_RSE_2} to be AVAILABLE: {states}"
|
388
|
+
|
389
|
+
# Download the file from OFFSITE_RSE_2 to verify its content
|
390
|
+
download_spec = {
|
391
|
+
"did": f"{test_scope}:{lfn}",
|
392
|
+
"base_dir": str(tmp_path),
|
393
|
+
"no_subdir": True,
|
394
|
+
"rse": OFFSITE_RSE_2,
|
395
|
+
}
|
396
|
+
download_client = DownloadClient()
|
397
|
+
download_client.download_dids([download_spec])
|
398
|
+
|
399
|
+
# Verify the downloaded file content
|
400
|
+
download_path = tmp_path / lfn.lstrip("/")
|
401
|
+
assert download_path.is_file(), f"Download failed at {download_path}"
|
402
|
+
downloaded_content = download_path.read_text()
|
403
|
+
assert downloaded_content == test_file_content, (
|
404
|
+
f"Downloaded file content does not match the original. "
|
405
|
+
f"Expected: {test_file_content}, Got: {downloaded_content}"
|
406
|
+
)
|
407
|
+
|
408
|
+
|
409
|
+
@pytest.mark.usefixtures("_auth_proxy")
|
410
|
+
@pytest.mark.verifies_usecase("UC-110-1.6")
|
411
|
+
def test_add_offsite_replication_rules_single_copy(
|
412
|
+
pre_existing_lfn: str,
|
413
|
+
test_scope: str,
|
414
|
+
test_vo: str,
|
415
|
+
storage_mount_path: Path,
|
416
|
+
tmp_path: Path,
|
417
|
+
onsite_test_file: tuple[Path, str],
|
418
|
+
caplog,
|
419
|
+
):
|
420
|
+
"""Test the add_offsite_replication_rules method of IngestionClient with a single copy (copies=1)."""
|
421
|
+
ingestion_client = IngestionClient(
|
422
|
+
storage_mount_path, ONSITE_RSE, scope=test_scope, vo=test_vo
|
423
|
+
)
|
424
|
+
caplog.set_level(logging.DEBUG)
|
425
|
+
|
426
|
+
# Replicate the ACADA file to one offsite RSE
|
427
|
+
lfn = pre_existing_lfn
|
428
|
+
did = {"scope": test_scope, "name": lfn}
|
429
|
+
|
430
|
+
_, test_file_content = onsite_test_file
|
431
|
+
|
432
|
+
offsite_rse_expression = "OFFSITE"
|
433
|
+
copies = 1
|
434
|
+
rule_ids = ingestion_client.add_offsite_replication_rules(
|
435
|
+
lfn=lfn,
|
436
|
+
offsite_rse_expression=offsite_rse_expression,
|
437
|
+
copies=copies,
|
438
|
+
lifetime=None,
|
439
|
+
)
|
440
|
+
|
441
|
+
# Verify that only one rule was created
|
442
|
+
assert (
|
443
|
+
len(rule_ids) == 1
|
444
|
+
), f"Expected exactly 1 rule ID, got {len(rule_ids)}: {rule_ids}"
|
445
|
+
rule_id_offsite_1 = rule_ids[0]
|
446
|
+
rule_client = RuleClient()
|
447
|
+
|
448
|
+
# Wait for the offsite rule to complete
|
449
|
+
wait_for_replication_status(rule_client, rule_id_offsite_1, expected_status="OK")
|
450
|
+
|
451
|
+
# Verify the replica exists on exactly one of the offsite RSEs (either OFFSITE_RSE_1 or OFFSITE_RSE_2)
|
452
|
+
replica_client = ReplicaClient()
|
453
|
+
replicas = next(replica_client.list_replicas(dids=[did]))
|
454
|
+
states = replicas.get("states", {})
|
455
|
+
LOGGER.info(
|
456
|
+
"Replica states for DID %s in test_add_offsite_replication_rules_single_copy: %s",
|
457
|
+
did,
|
458
|
+
states,
|
459
|
+
)
|
460
|
+
# Check that the replica exists on exactly one offsite RSE
|
461
|
+
offsite_replica_count = sum(
|
462
|
+
1 for rse in [OFFSITE_RSE_1, OFFSITE_RSE_2] if states.get(rse) == "AVAILABLE"
|
463
|
+
)
|
464
|
+
assert (
|
465
|
+
offsite_replica_count == 1
|
466
|
+
), f"Expected exactly 1 offsite replica (on either {OFFSITE_RSE_1} or {OFFSITE_RSE_2}), got {offsite_replica_count}: {states}"
|
467
|
+
|
468
|
+
# Determine which offsite RSE the replica was created on
|
469
|
+
target_offsite_rse = (
|
470
|
+
OFFSITE_RSE_1 if states.get(OFFSITE_RSE_1) == "AVAILABLE" else OFFSITE_RSE_2
|
471
|
+
)
|
472
|
+
|
473
|
+
# Download the file from the target offsite RSE to verify its content
|
474
|
+
download_spec = {
|
475
|
+
"did": f"{test_scope}:{lfn}",
|
476
|
+
"base_dir": str(tmp_path),
|
477
|
+
"no_subdir": True,
|
478
|
+
"rse": target_offsite_rse,
|
479
|
+
}
|
480
|
+
download_client = DownloadClient()
|
481
|
+
download_client.download_dids([download_spec])
|
482
|
+
|
483
|
+
# Verify the downloaded file content
|
484
|
+
download_path = tmp_path / lfn.lstrip("/")
|
485
|
+
assert download_path.is_file(), f"Download failed at {download_path}"
|
486
|
+
downloaded_content = download_path.read_text()
|
487
|
+
assert downloaded_content == test_file_content, (
|
488
|
+
f"Downloaded file content does not match the original. "
|
489
|
+
f"Expected: {test_file_content}, Got: {downloaded_content}"
|
490
|
+
)
|
491
|
+
|
492
|
+
|
493
|
+
def test_verify_fits_file(tel_events_test_file):
|
494
|
+
from bdms.acada_ingestion import verify_fits_checksum
|
495
|
+
|
496
|
+
with fits.open(tel_events_test_file) as hdul:
|
497
|
+
verify_fits_checksum(hdul)
|
498
|
+
|
499
|
+
|
500
|
+
@pytest.fixture
|
501
|
+
def broken_checksum(tmp_path):
|
502
|
+
# create a fits file with a broken checksum
|
503
|
+
path = tmp_path / "invalid.fits"
|
504
|
+
|
505
|
+
table = Table({"foo": [1, 2, 3], "bar": [4.0, 5.0, 6.0]})
|
506
|
+
hdul = fits.HDUList([fits.PrimaryHDU(), fits.BinTableHDU(table)])
|
507
|
+
hdul.writeto(path, checksum=True)
|
508
|
+
|
509
|
+
# break it
|
510
|
+
with path.open("rb+") as f:
|
511
|
+
# FITS files are stored in blocks of 2880 bytes
|
512
|
+
# first chunk should be the primary header
|
513
|
+
# second chunk the header of the bintable
|
514
|
+
# third chunk the payload of the bintable
|
515
|
+
# we write garbage somewhere into the payload of the table
|
516
|
+
f.seek(2 * 2880 + 10)
|
517
|
+
f.write(b"\x12\x34\xff")
|
518
|
+
return path
|
519
|
+
|
520
|
+
|
521
|
+
def test_verify_fits_file_invalid_checksum(broken_checksum):
|
522
|
+
from bdms.acada_ingestion import FITSVerificationError, verify_fits_checksum
|
523
|
+
|
524
|
+
with fits.open(broken_checksum) as hdul:
|
525
|
+
with pytest.raises(FITSVerificationError, match="CHECKSUM verification failed"):
|
526
|
+
verify_fits_checksum(hdul)
|
@@ -1,11 +1,11 @@
|
|
1
|
-
import time
|
2
|
-
|
3
1
|
import pytest
|
4
2
|
from rucio.client import Client
|
5
3
|
from rucio.client.client import ReplicaClient, RuleClient
|
6
4
|
from rucio.client.didclient import DIDClient
|
7
5
|
from rucio.client.uploadclient import UploadClient
|
8
6
|
|
7
|
+
from bdms.tests.utils import wait_for_replication_status
|
8
|
+
|
9
9
|
|
10
10
|
def test_server_version():
|
11
11
|
"""Test the expected version of rucio is running"""
|
@@ -67,40 +67,22 @@ def test_upload_file(test_vo, test_scope, tmp_path):
|
|
67
67
|
assert upload_client.upload([upload_spec]) == 0
|
68
68
|
|
69
69
|
|
70
|
+
"""
|
70
71
|
@pytest.mark.parametrize(
|
71
|
-
"timeout",
|
72
|
+
"timeout,poll_interval",
|
72
73
|
[
|
73
74
|
pytest.param(
|
74
75
|
60,
|
76
|
+
5,
|
75
77
|
marks=pytest.mark.xfail(
|
76
78
|
reason="sometimes there is an extra 300s timeout somewhere in FTS"
|
77
79
|
),
|
80
|
+
id="timeout-60",
|
78
81
|
),
|
79
|
-
(600,),
|
82
|
+
(600, 5),
|
80
83
|
],
|
81
84
|
)
|
82
|
-
|
83
|
-
rule_client = RuleClient()
|
84
|
-
|
85
|
-
start = time.perf_counter()
|
86
|
-
|
87
|
-
current_status = None
|
88
|
-
result = None
|
89
|
-
|
90
|
-
while (time.perf_counter() - start) < timeout:
|
91
|
-
result = rule_client.get_replication_rule(rule)
|
92
|
-
current_status = result["state"]
|
93
|
-
|
94
|
-
if current_status == status:
|
95
|
-
return
|
96
|
-
|
97
|
-
time.sleep(poll)
|
98
|
-
|
99
|
-
msg = (
|
100
|
-
f"Rule {rule} did not reach status '{status}' within {timeout} seconds."
|
101
|
-
f" Current status is '{current_status}'.\nFull output: {result}"
|
102
|
-
)
|
103
|
-
raise TimeoutError(msg)
|
85
|
+
"""
|
104
86
|
|
105
87
|
|
106
88
|
@pytest.mark.usefixtures("_auth_proxy")
|
@@ -138,6 +120,11 @@ def test_replication(test_vo, test_scope, tmp_path):
|
|
138
120
|
dids=dids, copies=1, rse_expression=replica_rse
|
139
121
|
)[0]
|
140
122
|
|
141
|
-
wait_for_replication_status(
|
123
|
+
wait_for_replication_status(
|
124
|
+
rule_client,
|
125
|
+
rule_id=rule,
|
126
|
+
expected_status="OK",
|
127
|
+
poll_interval=5,
|
128
|
+
)
|
142
129
|
replicas = next(replica_client.list_replicas(dids))
|
143
130
|
assert replicas["states"] == {"STORAGE-1": "AVAILABLE", "STORAGE-2": "AVAILABLE"}
|
@@ -0,0 +1,97 @@
|
|
1
|
+
from astropy.io import fits
|
2
|
+
|
3
|
+
from bdms.extract_fits_metadata import (
|
4
|
+
extract_metadata_from_data,
|
5
|
+
extract_metadata_from_headers,
|
6
|
+
)
|
7
|
+
|
8
|
+
|
9
|
+
def test_extraction_correct_value_subarray_file(subarray_test_file):
|
10
|
+
"""Test the extraction of metadata from a FITS file."""
|
11
|
+
with fits.open(subarray_test_file) as hdul:
|
12
|
+
metadata_header = extract_metadata_from_headers(hdul)
|
13
|
+
|
14
|
+
metadata_payload = extract_metadata_from_data(subarray_test_file)
|
15
|
+
metadata_fits = {**metadata_header, **metadata_payload}
|
16
|
+
|
17
|
+
assert len(metadata_fits) > 0, "No metadata found in the SUBARRAY FITS"
|
18
|
+
|
19
|
+
expected_keys_in_fits_file = {
|
20
|
+
"observatory": "CTA",
|
21
|
+
"start_time": "2025-02-04T21:34:05",
|
22
|
+
"end_time": "2025-02-04T21:43:12",
|
23
|
+
"subarray_id": 0,
|
24
|
+
"sb_id": 2000000066,
|
25
|
+
"obs_id": 2000000200,
|
26
|
+
}
|
27
|
+
|
28
|
+
for key, value in expected_keys_in_fits_file.items():
|
29
|
+
assert metadata_fits[key] == value, f"Expected key '{key}' not found."
|
30
|
+
|
31
|
+
|
32
|
+
def test_extraction_correct_value_tel_trigger_file(tel_trigger_test_file):
|
33
|
+
"""Test the extraction of metadata from a FITS file."""
|
34
|
+
with fits.open(tel_trigger_test_file) as hdul:
|
35
|
+
metadata_header = extract_metadata_from_headers(hdul)
|
36
|
+
|
37
|
+
metadata_payload = extract_metadata_from_data(tel_trigger_test_file)
|
38
|
+
metadata_fits = {**metadata_header, **metadata_payload}
|
39
|
+
|
40
|
+
assert len(metadata_fits) > 0, "No metadata found in the Telescope TRIGGER FITS"
|
41
|
+
|
42
|
+
expected_keys_in_fits_file = {
|
43
|
+
"observatory": "CTA",
|
44
|
+
"start_time": "2025-02-04T21:34:05",
|
45
|
+
"end_time": "2025-02-04T21:43:11",
|
46
|
+
"tel_ids": [1],
|
47
|
+
"sb_id": 2000000066,
|
48
|
+
"obs_id": 2000000200,
|
49
|
+
}
|
50
|
+
|
51
|
+
for key, value in expected_keys_in_fits_file.items():
|
52
|
+
assert metadata_fits[key] == value, f"Expected key '{key}' not found."
|
53
|
+
|
54
|
+
|
55
|
+
def test_extraction_correct_value_tel_events_file(tel_events_test_file):
|
56
|
+
"""Test the extraction of metadata from a FITS file."""
|
57
|
+
with fits.open(tel_events_test_file) as hdul:
|
58
|
+
metadata_header = extract_metadata_from_headers(hdul)
|
59
|
+
|
60
|
+
metadata_payload = extract_metadata_from_data(tel_events_test_file)
|
61
|
+
metadata_fits = {**metadata_header, **metadata_payload}
|
62
|
+
|
63
|
+
assert len(metadata_fits) > 0, "No metadata found in the Telescope EVENTS FITS"
|
64
|
+
|
65
|
+
expected_keys_in_fits_file = {
|
66
|
+
"observatory": "CTA",
|
67
|
+
"start_time": "2025-04-01T15:25:02",
|
68
|
+
"end_time": "2025-04-01T15:25:03",
|
69
|
+
"sb_id": 0,
|
70
|
+
"obs_id": 0,
|
71
|
+
}
|
72
|
+
|
73
|
+
for key, value in expected_keys_in_fits_file.items():
|
74
|
+
assert metadata_fits[key] == value, f"Expected key '{key}' not found."
|
75
|
+
|
76
|
+
|
77
|
+
def test_extract_metadata_from_data_incorrect_header(tmp_path):
|
78
|
+
"""Test the extraction of metadata from an empty FITS file header."""
|
79
|
+
fits_file_path = tmp_path / "empty_fits.fits.fz"
|
80
|
+
hdul = fits.HDUList([fits.PrimaryHDU()])
|
81
|
+
hdul.writeto(fits_file_path, checksum=True)
|
82
|
+
|
83
|
+
with fits.open(fits_file_path) as hdul:
|
84
|
+
metadata = extract_metadata_from_headers(hdul)
|
85
|
+
|
86
|
+
assert metadata == {}, "Expected empty metadata in the header"
|
87
|
+
|
88
|
+
|
89
|
+
def test_extract_metadata_from_data_incorrect_data(tmp_path):
|
90
|
+
"""Test the extraction of metadata from an empty FITS file data."""
|
91
|
+
fits_file_path = tmp_path / "empty_fits.fits.fz"
|
92
|
+
hdul = fits.HDUList([fits.PrimaryHDU()])
|
93
|
+
hdul.writeto(fits_file_path, checksum=True)
|
94
|
+
|
95
|
+
metadata = extract_metadata_from_data(fits_file_path)
|
96
|
+
|
97
|
+
assert metadata == {}, "Expected empty metadata in the payload"
|