ctao-bdms-clients 0.2.0rc1__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bdms/_version.py +2 -2
- bdms/acada_ingestion.py +48 -4
- bdms/extract_fits_metadata.py +134 -0
- bdms/tests/conftest.py +25 -2
- bdms/tests/test_acada_ingestion.py +81 -56
- bdms/tests/test_basic_rucio_functionality.py +0 -1
- bdms/tests/test_extract_fits_metadata.py +97 -0
- bdms/tests/test_onsite_storage.py +16 -35
- bdms/tests/utils.py +18 -0
- {ctao_bdms_clients-0.2.0rc1.dist-info → ctao_bdms_clients-0.2.1.dist-info}/METADATA +4 -2
- ctao_bdms_clients-0.2.1.dist-info/RECORD +20 -0
- {ctao_bdms_clients-0.2.0rc1.dist-info → ctao_bdms_clients-0.2.1.dist-info}/WHEEL +1 -1
- ctao_bdms_clients-0.2.0rc1.dist-info/RECORD +0 -18
- {ctao_bdms_clients-0.2.0rc1.dist-info → ctao_bdms_clients-0.2.1.dist-info}/licenses/LICENSE +0 -0
- {ctao_bdms_clients-0.2.0rc1.dist-info → ctao_bdms_clients-0.2.1.dist-info}/top_level.txt +0 -0
bdms/_version.py
CHANGED
@@ -17,5 +17,5 @@ __version__: str
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
18
18
|
version_tuple: VERSION_TUPLE
|
19
19
|
|
20
|
-
__version__ = version = '0.2.
|
21
|
-
__version_tuple__ = version_tuple = (0, 2,
|
20
|
+
__version__ = version = '0.2.1'
|
21
|
+
__version_tuple__ = version_tuple = (0, 2, 1)
|
bdms/acada_ingestion.py
CHANGED
@@ -4,16 +4,16 @@ This module provides the IngestionClient class to manage the ingestion of ACADA
|
|
4
4
|
It includes functionality for constructing FITS file paths, converting ACADA paths to Logical File Names (LFNs),
|
5
5
|
and registering replicas in Rucio.
|
6
6
|
"""
|
7
|
-
# this is the latest working file (as on 6:40 pm) with judge repairer repairing the STUCK rule.
|
8
7
|
|
9
8
|
import logging
|
10
9
|
import os
|
10
|
+
from contextlib import ExitStack
|
11
11
|
from pathlib import Path
|
12
12
|
from typing import Optional, Union
|
13
13
|
|
14
14
|
from astropy.io import fits
|
15
15
|
from rucio.client.accountclient import AccountClient
|
16
|
-
from rucio.client.client import Client
|
16
|
+
from rucio.client.client import Client, DIDClient
|
17
17
|
from rucio.client.replicaclient import ReplicaClient
|
18
18
|
from rucio.client.rseclient import RSEClient
|
19
19
|
from rucio.client.ruleclient import RuleClient
|
@@ -21,6 +21,11 @@ from rucio.client.scopeclient import ScopeClient
|
|
21
21
|
from rucio.common.exception import Duplicate, RucioException
|
22
22
|
from rucio.common.utils import adler32
|
23
23
|
|
24
|
+
from bdms.extract_fits_metadata import (
|
25
|
+
extract_metadata_from_data,
|
26
|
+
extract_metadata_from_headers,
|
27
|
+
)
|
28
|
+
|
24
29
|
LOGGER = logging.getLogger(__name__)
|
25
30
|
|
26
31
|
|
@@ -91,6 +96,7 @@ class IngestionClient:
|
|
91
96
|
self.account_client = AccountClient()
|
92
97
|
self.rse_client = RSEClient()
|
93
98
|
self.rule_client = RuleClient()
|
99
|
+
self.did_client = DIDClient()
|
94
100
|
except RucioException as e:
|
95
101
|
self.logger.error("Failed to initialize Rucio clients: %s", str(e))
|
96
102
|
raise
|
@@ -261,9 +267,10 @@ class IngestionClient:
|
|
261
267
|
return lfn
|
262
268
|
|
263
269
|
# Proceed with registering the replica if check_replica_exists returns False
|
270
|
+
valid, metadata = verify_and_extract_metadata(acada_path)
|
271
|
+
metadata["valid_fits_checksum"] = valid
|
264
272
|
|
265
|
-
# Compute file metadata
|
266
|
-
# TODO: use functions to identify file type, extract metadata, validate integrity, when this functionality is ready https://gitlab.cta-observatory.org/cta-computing/dpps/bdms/bdms/-/work_items/46
|
273
|
+
# Compute rucio file metadata
|
267
274
|
file_size = acada_path.stat().st_size
|
268
275
|
checksum = adler32(acada_path)
|
269
276
|
|
@@ -286,6 +293,10 @@ class IngestionClient:
|
|
286
293
|
)
|
287
294
|
self.logger.info("Successfully registered the replica for lfn '%s'", lfn)
|
288
295
|
|
296
|
+
if len(metadata) > 0:
|
297
|
+
self.did_client.set_metadata_bulk(scope=self.scope, name=lfn, meta=metadata)
|
298
|
+
self.logger.info("Set metadata of %r to %r", lfn, metadata)
|
299
|
+
|
289
300
|
return lfn
|
290
301
|
|
291
302
|
def add_offsite_replication_rules(
|
@@ -433,3 +444,36 @@ def verify_fits_checksum(hdul: fits.HDUList):
|
|
433
444
|
raise FITSVerificationError(msg)
|
434
445
|
elif checksum_result == 2 and pos != 0: # ignore primary for warning
|
435
446
|
LOGGER.warning("No CHECKSUM in HDU %d with name %r", pos, name)
|
447
|
+
|
448
|
+
|
449
|
+
def verify_and_extract_metadata(fits_path):
|
450
|
+
"""Verify checksums and extract metadata from FITS files.
|
451
|
+
|
452
|
+
This wrapper transforms exceptions into log errors and minimizes
|
453
|
+
the number of times the FITS file has to be opened.
|
454
|
+
"""
|
455
|
+
# this context manager allows elegant handling
|
456
|
+
# of conditionally present context managers
|
457
|
+
# which allows better handling of exceptions below
|
458
|
+
context = ExitStack()
|
459
|
+
metadata = {}
|
460
|
+
with context:
|
461
|
+
try:
|
462
|
+
hdul = context.enter_context(fits.open(fits_path))
|
463
|
+
except Exception as e:
|
464
|
+
LOGGER.error("Failed to open FITS file %r: %s", fits_path, e)
|
465
|
+
return False, metadata
|
466
|
+
|
467
|
+
try:
|
468
|
+
verify_fits_checksum(hdul)
|
469
|
+
except FITSVerificationError as e:
|
470
|
+
LOGGER.error("File %r failed FITS checksum verification: %s", fits_path, e)
|
471
|
+
return False, metadata
|
472
|
+
|
473
|
+
try:
|
474
|
+
metadata = extract_metadata_from_headers(hdul)
|
475
|
+
metadata.update(extract_metadata_from_data(fits_path))
|
476
|
+
return True, metadata
|
477
|
+
except Exception as e:
|
478
|
+
LOGGER.error("Failed to extract metadata from %r: %s", fits_path, e)
|
479
|
+
return False, metadata
|
@@ -0,0 +1,134 @@
|
|
1
|
+
"""Functions to extract metadata from input files."""
|
2
|
+
|
3
|
+
import logging
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
from protozfits import File
|
7
|
+
|
8
|
+
# Configure logger
|
9
|
+
logger = logging.getLogger(__name__)
|
10
|
+
|
11
|
+
# COMMON HEADER
|
12
|
+
start_time = "DataStream.DATE"
|
13
|
+
|
14
|
+
# COMMON DATA
|
15
|
+
origin = "DataStream.ORIGIN"
|
16
|
+
sb_id = "DataStream.sb_id"
|
17
|
+
obs_id = "DataStream.obs_id"
|
18
|
+
|
19
|
+
# -- FOR TEL_TRIG
|
20
|
+
tel_ids = "DataStream.tel_ids"
|
21
|
+
|
22
|
+
# -- FOR TEL_SUB
|
23
|
+
subarray_id = "DataStream.subarray_id"
|
24
|
+
|
25
|
+
METADATA_TEL = {
|
26
|
+
"HEADER": {
|
27
|
+
"observatory": origin,
|
28
|
+
"start_time": start_time,
|
29
|
+
"end_time": "Events.DATEEND",
|
30
|
+
},
|
31
|
+
"PAYLOAD": {
|
32
|
+
"sb_id": sb_id,
|
33
|
+
"obs_id": obs_id,
|
34
|
+
},
|
35
|
+
}
|
36
|
+
|
37
|
+
METADATA_SUB = {
|
38
|
+
"HEADER": {
|
39
|
+
"observatory": origin,
|
40
|
+
"start_time": start_time,
|
41
|
+
"end_time": "SubarrayEvents.DATEEND",
|
42
|
+
},
|
43
|
+
"PAYLOAD": {
|
44
|
+
"subarray_id": subarray_id,
|
45
|
+
"sb_id": sb_id,
|
46
|
+
"obs_id": obs_id,
|
47
|
+
},
|
48
|
+
}
|
49
|
+
|
50
|
+
METADATA_TRIG = {
|
51
|
+
"HEADER": {
|
52
|
+
"observatory": origin,
|
53
|
+
"start_time": start_time,
|
54
|
+
"end_time": "Triggers.DATEEND",
|
55
|
+
},
|
56
|
+
"PAYLOAD": {
|
57
|
+
"tel_ids": tel_ids,
|
58
|
+
"sb_id": sb_id,
|
59
|
+
"obs_id": obs_id,
|
60
|
+
},
|
61
|
+
}
|
62
|
+
|
63
|
+
#: Mapping from DataStream.PBFHEAD to the metadata items we want to collect
|
64
|
+
METADATA_SCHEMAS = {
|
65
|
+
"DL0v1.Trigger.DataStream": METADATA_TRIG,
|
66
|
+
"DL0v1.Subarray.DataStream": METADATA_SUB,
|
67
|
+
"DL0v1.Telescope.DataStream": METADATA_TEL,
|
68
|
+
}
|
69
|
+
|
70
|
+
|
71
|
+
def extract_metadata_from_headers(hdul):
|
72
|
+
"""Extract metadata from FITS headers of hdul."""
|
73
|
+
all_headers = {}
|
74
|
+
for hdu in hdul:
|
75
|
+
if hdu.is_image:
|
76
|
+
continue
|
77
|
+
all_headers[hdu.name] = dict(hdu.header)
|
78
|
+
|
79
|
+
try:
|
80
|
+
all_headers["DataStream"]
|
81
|
+
except KeyError:
|
82
|
+
logger.error("No DataStream HDU found in the FITS file.")
|
83
|
+
return {}
|
84
|
+
|
85
|
+
pbfhead = all_headers["DataStream"]["PBFHEAD"]
|
86
|
+
schema = METADATA_SCHEMAS.get(pbfhead)
|
87
|
+
if schema is None:
|
88
|
+
logger.error(
|
89
|
+
"The PBFHEAD %r does not correspond to any known FITS type.", pbfhead
|
90
|
+
)
|
91
|
+
return {}
|
92
|
+
|
93
|
+
logger.debug("Headers extracted: %s", all_headers.keys())
|
94
|
+
|
95
|
+
metadata = {}
|
96
|
+
for value_name, metadata_path in schema["HEADER"].items():
|
97
|
+
extname, header_key = metadata_path.split(".")
|
98
|
+
table = all_headers[extname][header_key]
|
99
|
+
metadata[value_name] = table
|
100
|
+
|
101
|
+
return metadata
|
102
|
+
|
103
|
+
|
104
|
+
def extract_metadata_from_data(path):
|
105
|
+
"""Extract metadata from zFITS payload in path."""
|
106
|
+
with File(path) as f:
|
107
|
+
if not hasattr(f, "DataStream"):
|
108
|
+
return {}
|
109
|
+
|
110
|
+
pbfhead = f.DataStream.header["PBFHEAD"]
|
111
|
+
schema = METADATA_SCHEMAS.get(pbfhead)
|
112
|
+
if schema is None:
|
113
|
+
logger.error(
|
114
|
+
"The PBFHEAD %r does not correspond to any known FITS type.", pbfhead
|
115
|
+
)
|
116
|
+
return {}
|
117
|
+
|
118
|
+
metadata = {}
|
119
|
+
for value_name, metadata_path in schema["PAYLOAD"].items():
|
120
|
+
hdu, column = metadata_path.split(".")
|
121
|
+
row = getattr(f, hdu)[0]
|
122
|
+
metadata[value_name] = getattr(row, column)
|
123
|
+
|
124
|
+
if isinstance(metadata[value_name], np.ndarray):
|
125
|
+
# Convert numpy array to a Python list
|
126
|
+
metadata[value_name] = metadata[value_name].tolist()
|
127
|
+
|
128
|
+
logger.debug(
|
129
|
+
"Value '%s' from '%s' extracted. (renamed as '%s')",
|
130
|
+
column,
|
131
|
+
hdu,
|
132
|
+
value_name,
|
133
|
+
)
|
134
|
+
return metadata
|
bdms/tests/conftest.py
CHANGED
@@ -8,7 +8,7 @@ from secrets import token_hex
|
|
8
8
|
import pytest
|
9
9
|
from rucio.client.scopeclient import ScopeClient
|
10
10
|
|
11
|
-
from bdms.tests.utils import download_test_file
|
11
|
+
from bdms.tests.utils import download_test_file, reset_xrootd_permissions
|
12
12
|
|
13
13
|
USER_CERT = os.getenv("RUCIO_CFG_CLIENT_CERT", "/opt/rucio/etc/usercert.pem")
|
14
14
|
USER_KEY = os.getenv("RUCIO_CFG_CLIENT_KEY", "/opt/rucio/etc/userkey.pem")
|
@@ -90,5 +90,28 @@ def tel_trigger_test_file():
|
|
90
90
|
@pytest.fixture(scope="session")
|
91
91
|
def tel_events_test_file():
|
92
92
|
"""Fixture to download a telescope events test file"""
|
93
|
-
path = "acada-small/DL0/LSTN-01/ctao-n-acada/acada-adh/events/2025/02/04/
|
93
|
+
path = "acada-small/DL0/LSTN-01/ctao-n-acada/acada-adh/events/2025/02/04/TEL001_SDH0000_20250204T213354_SBID0000000002000000066_OBSID0000000002000000200_CHUNK001.fits.fz"
|
94
94
|
return download_test_file(path)
|
95
|
+
|
96
|
+
|
97
|
+
@pytest.fixture
|
98
|
+
def onsite_test_file(
|
99
|
+
storage_mount_path: Path, test_scope: str, test_vo: str
|
100
|
+
) -> tuple[Path, str]:
|
101
|
+
"""Create a dummy file in the shared storage for testing."""
|
102
|
+
|
103
|
+
unique_id = f"{datetime.now():%Y%m%d_%H%M%S}_{token_hex(8)}"
|
104
|
+
filename = f"testfile_{unique_id}.txt"
|
105
|
+
|
106
|
+
test_file_path = storage_mount_path / test_vo / test_scope / filename
|
107
|
+
test_file_path.parent.mkdir(parents=True, exist_ok=True)
|
108
|
+
|
109
|
+
# Write a small test content (simulating a .fits.fz file with minimal content for testing)
|
110
|
+
test_file_content = f"Test file with random content: {unique_id}"
|
111
|
+
test_file_path.write_text(test_file_content)
|
112
|
+
|
113
|
+
# need to change file permissions of created directories so that
|
114
|
+
# the xrootd still can read and write there
|
115
|
+
reset_xrootd_permissions(storage_mount_path)
|
116
|
+
|
117
|
+
return test_file_path, test_file_content
|
@@ -5,11 +5,9 @@ and the replication of data between Rucio storage elements (RSEs).
|
|
5
5
|
"""
|
6
6
|
|
7
7
|
import logging
|
8
|
-
import os
|
9
8
|
import subprocess
|
10
|
-
from datetime import datetime
|
11
9
|
from pathlib import Path
|
12
|
-
from
|
10
|
+
from shutil import copy2
|
13
11
|
|
14
12
|
import pytest
|
15
13
|
from astropy.io import fits
|
@@ -22,12 +20,10 @@ from rucio.common.exception import RucioException
|
|
22
20
|
from rucio.common.utils import adler32
|
23
21
|
|
24
22
|
from bdms.acada_ingestion import IngestionClient
|
25
|
-
from bdms.tests.utils import wait_for_replication_status
|
23
|
+
from bdms.tests.utils import reset_xrootd_permissions, wait_for_replication_status
|
26
24
|
|
27
25
|
LOGGER = logging.getLogger(__name__)
|
28
26
|
|
29
|
-
XROOTD_UID = 994
|
30
|
-
XROOTD_GID = 994
|
31
27
|
ONSITE_RSE = "STORAGE-1"
|
32
28
|
OFFSITE_RSE_1 = "STORAGE-2"
|
33
29
|
OFFSITE_RSE_2 = "STORAGE-3"
|
@@ -41,16 +37,6 @@ def test_shared_storage(storage_mount_path: Path):
|
|
41
37
|
), f"Shared storage {storage_mount_path} is not available on the client"
|
42
38
|
|
43
39
|
|
44
|
-
def recursive_chown(path: Path, uid: int, gid: int):
|
45
|
-
"""Equivalent of unix chmod -R <uid>:<gid> <path>."""
|
46
|
-
for root, dirs, files in os.walk(path):
|
47
|
-
root = Path(root)
|
48
|
-
for d in dirs:
|
49
|
-
os.chown(root / d, uid, gid)
|
50
|
-
for f in files:
|
51
|
-
os.chown(root / f, uid, gid)
|
52
|
-
|
53
|
-
|
54
40
|
def trigger_judge_repairer() -> None:
|
55
41
|
"""Trigger the rucio-judge-repairer daemon to run once and fix any STUCK rules."""
|
56
42
|
|
@@ -80,30 +66,6 @@ def trigger_judge_repairer() -> None:
|
|
80
66
|
raise
|
81
67
|
|
82
68
|
|
83
|
-
@pytest.fixture
|
84
|
-
def test_file(
|
85
|
-
storage_mount_path: Path, test_scope: str, test_vo: str
|
86
|
-
) -> tuple[Path, str]:
|
87
|
-
"""Create a dummy .fits.fz file in the shared storage for testing."""
|
88
|
-
|
89
|
-
unique_id = f"{datetime.now():%Y%m%d_%H%M%S}_{token_hex(8)}"
|
90
|
-
filename = f"testfile_{unique_id}.fits.fz"
|
91
|
-
|
92
|
-
test_file_path = storage_mount_path / test_vo / test_scope / filename
|
93
|
-
test_file_path.parent.mkdir(parents=True, exist_ok=True)
|
94
|
-
|
95
|
-
# need to change file permissions of created directories so that
|
96
|
-
# the xrootd still can read and write there
|
97
|
-
recursive_chown(storage_mount_path / test_vo, XROOTD_UID, XROOTD_GID)
|
98
|
-
|
99
|
-
# Write a small test content (simulating a .fits.fz file with minimal content for testing)
|
100
|
-
test_file_content = f"FITS-like content for {unique_id}"
|
101
|
-
test_file_path.write_text(test_file_content)
|
102
|
-
os.chown(test_file_path, XROOTD_UID, XROOTD_GID)
|
103
|
-
|
104
|
-
return test_file_path, test_file_content
|
105
|
-
|
106
|
-
|
107
69
|
def test_acada_to_lfn(storage_mount_path: Path, test_vo: str):
|
108
70
|
"""Test the acada_to_lfn method of IngestionClient with valid and invalid inputs."""
|
109
71
|
|
@@ -151,7 +113,10 @@ def test_acada_to_lfn(storage_mount_path: Path, test_vo: str):
|
|
151
113
|
|
152
114
|
@pytest.mark.usefixtures("_auth_proxy")
|
153
115
|
def test_check_replica_exists(
|
154
|
-
storage_mount_path: Path,
|
116
|
+
storage_mount_path: Path,
|
117
|
+
test_scope: str,
|
118
|
+
onsite_test_file: tuple[Path, str],
|
119
|
+
test_vo: str,
|
155
120
|
):
|
156
121
|
"""Test the check_replica_exists method of IngestionClient."""
|
157
122
|
|
@@ -159,7 +124,7 @@ def test_check_replica_exists(
|
|
159
124
|
storage_mount_path, ONSITE_RSE, scope=test_scope, vo=test_vo
|
160
125
|
)
|
161
126
|
|
162
|
-
acada_path, _ =
|
127
|
+
acada_path, _ = onsite_test_file
|
163
128
|
|
164
129
|
# Generate the LFN
|
165
130
|
lfn = ingestion_client.acada_to_lfn(acada_path)
|
@@ -181,10 +146,54 @@ def test_check_replica_exists(
|
|
181
146
|
assert not ingestion_client.check_replica_exists(nonexistent_lfn), msg
|
182
147
|
|
183
148
|
|
149
|
+
@pytest.fixture
|
150
|
+
def file_location(request):
|
151
|
+
return request.getfixturevalue(request.param)
|
152
|
+
|
153
|
+
|
154
|
+
@pytest.mark.parametrize(
|
155
|
+
("file_location", "metadata_dict"),
|
156
|
+
[
|
157
|
+
(
|
158
|
+
"subarray_test_file",
|
159
|
+
{
|
160
|
+
"observatory": "CTA",
|
161
|
+
"start_time": "2025-02-04T21:34:05",
|
162
|
+
"end_time": "2025-02-04T21:43:12",
|
163
|
+
"subarray_id": 0,
|
164
|
+
"sb_id": 2000000066,
|
165
|
+
"obs_id": 2000000200,
|
166
|
+
},
|
167
|
+
),
|
168
|
+
(
|
169
|
+
"tel_trigger_test_file",
|
170
|
+
{
|
171
|
+
"observatory": "CTA",
|
172
|
+
"start_time": "2025-02-04T21:34:05",
|
173
|
+
"end_time": "2025-02-04T21:43:11",
|
174
|
+
"tel_ids": [1],
|
175
|
+
"sb_id": 2000000066,
|
176
|
+
"obs_id": 2000000200,
|
177
|
+
},
|
178
|
+
),
|
179
|
+
(
|
180
|
+
"tel_events_test_file",
|
181
|
+
{
|
182
|
+
"observatory": "CTA",
|
183
|
+
"start_time": "2025-04-01T15:25:02",
|
184
|
+
"end_time": "2025-04-01T15:25:03",
|
185
|
+
"sb_id": 0,
|
186
|
+
"obs_id": 0,
|
187
|
+
},
|
188
|
+
),
|
189
|
+
],
|
190
|
+
indirect=["file_location"],
|
191
|
+
)
|
184
192
|
@pytest.mark.usefixtures("_auth_proxy")
|
185
193
|
@pytest.mark.verifies_usecase("UC-110-1.1.1")
|
186
|
-
def
|
187
|
-
|
194
|
+
def test_add_onsite_replica_with_minio_fits_file(
|
195
|
+
file_location: str,
|
196
|
+
metadata_dict: dict,
|
188
197
|
test_scope: str,
|
189
198
|
tmp_path: Path,
|
190
199
|
storage_mount_path,
|
@@ -193,11 +202,16 @@ def test_add_onsite_replica_with_dummy_file(
|
|
193
202
|
):
|
194
203
|
"""Test the add_onsite_replica method of IngestionClient using a dummy file."""
|
195
204
|
|
205
|
+
filename = str(file_location).split("/")[-1]
|
206
|
+
acada_path = storage_mount_path / test_vo / test_scope / filename
|
207
|
+
acada_path.parent.mkdir(parents=True, exist_ok=True)
|
208
|
+
copy2(file_location, str(acada_path))
|
209
|
+
reset_xrootd_permissions(storage_mount_path)
|
210
|
+
|
196
211
|
ingestion_client = IngestionClient(
|
197
212
|
storage_mount_path, ONSITE_RSE, scope=test_scope, vo=test_vo
|
198
213
|
)
|
199
214
|
|
200
|
-
acada_path, test_file_content = test_file
|
201
215
|
# Use add_onsite_replica to register the replica
|
202
216
|
lfn = ingestion_client.add_onsite_replica(acada_path=acada_path)
|
203
217
|
|
@@ -218,11 +232,9 @@ def test_add_onsite_replica_with_dummy_file(
|
|
218
232
|
download_path = tmp_path / lfn.lstrip("/")
|
219
233
|
assert download_path.is_file(), f"Download failed at {download_path}"
|
220
234
|
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
f"Expected: {test_file_content}, Got: {downloaded_content}"
|
225
|
-
)
|
235
|
+
assert adler32(download_path) == adler32(
|
236
|
+
file_location
|
237
|
+
), "Downloaded file content does not match the original. "
|
226
238
|
|
227
239
|
# Check for don't ingest again if its already registered
|
228
240
|
caplog.clear()
|
@@ -231,6 +243,19 @@ def test_add_onsite_replica_with_dummy_file(
|
|
231
243
|
r.message for r in caplog.records
|
232
244
|
]
|
233
245
|
|
246
|
+
# Retrieve metadata using the DIDClient
|
247
|
+
did_client = Client()
|
248
|
+
retrieved_metadata = did_client.get_metadata(
|
249
|
+
scope=ingestion_client.scope, name=lfn, plugin="JSON"
|
250
|
+
)
|
251
|
+
|
252
|
+
# Verify the metadata matches the expected metadata
|
253
|
+
for key, value in metadata_dict.items():
|
254
|
+
assert retrieved_metadata.get(key) == value, (
|
255
|
+
f"Metadata mismatch for key '{key}'. "
|
256
|
+
f"Expected: {value}, Got: {retrieved_metadata.get(key)}"
|
257
|
+
)
|
258
|
+
|
234
259
|
|
235
260
|
def test_rses():
|
236
261
|
"""Test that the expected RSEs are configured."""
|
@@ -245,14 +270,14 @@ def test_rses():
|
|
245
270
|
|
246
271
|
@pytest.fixture
|
247
272
|
def pre_existing_lfn(
|
248
|
-
|
273
|
+
onsite_test_file: tuple[Path, str],
|
249
274
|
test_scope: str,
|
250
275
|
test_vo: str,
|
251
276
|
) -> str:
|
252
277
|
"""Fixture to provide an LFN for a replica pre-registered in Rucio without using IngestionClient."""
|
253
278
|
|
254
279
|
# Construct the LFN manually based on the test file and scope
|
255
|
-
acada_path, _ =
|
280
|
+
acada_path, _ = onsite_test_file
|
256
281
|
relative_path = str(acada_path).split(f"{test_vo}/{test_scope}/", 1)[-1]
|
257
282
|
lfn = f"/{test_vo}/{test_scope}/{relative_path}"
|
258
283
|
checksum = adler32(acada_path)
|
@@ -296,7 +321,7 @@ def test_add_offsite_replication_rules(
|
|
296
321
|
test_vo: str,
|
297
322
|
storage_mount_path: Path,
|
298
323
|
tmp_path: Path,
|
299
|
-
|
324
|
+
onsite_test_file: tuple[Path, str],
|
300
325
|
caplog,
|
301
326
|
):
|
302
327
|
"""Test the add_offsite_replication_rules method of IngestionClient."""
|
@@ -309,7 +334,7 @@ def test_add_offsite_replication_rules(
|
|
309
334
|
lfn = pre_existing_lfn
|
310
335
|
did = {"scope": test_scope, "name": lfn}
|
311
336
|
|
312
|
-
_, test_file_content =
|
337
|
+
_, test_file_content = onsite_test_file # Get the test file content
|
313
338
|
|
314
339
|
offsite_rse_expression = "OFFSITE"
|
315
340
|
copies = 2
|
@@ -389,7 +414,7 @@ def test_add_offsite_replication_rules_single_copy(
|
|
389
414
|
test_vo: str,
|
390
415
|
storage_mount_path: Path,
|
391
416
|
tmp_path: Path,
|
392
|
-
|
417
|
+
onsite_test_file: tuple[Path, str],
|
393
418
|
caplog,
|
394
419
|
):
|
395
420
|
"""Test the add_offsite_replication_rules method of IngestionClient with a single copy (copies=1)."""
|
@@ -402,7 +427,7 @@ def test_add_offsite_replication_rules_single_copy(
|
|
402
427
|
lfn = pre_existing_lfn
|
403
428
|
did = {"scope": test_scope, "name": lfn}
|
404
429
|
|
405
|
-
_, test_file_content =
|
430
|
+
_, test_file_content = onsite_test_file
|
406
431
|
|
407
432
|
offsite_rse_expression = "OFFSITE"
|
408
433
|
copies = 1
|
@@ -0,0 +1,97 @@
|
|
1
|
+
from astropy.io import fits
|
2
|
+
|
3
|
+
from bdms.extract_fits_metadata import (
|
4
|
+
extract_metadata_from_data,
|
5
|
+
extract_metadata_from_headers,
|
6
|
+
)
|
7
|
+
|
8
|
+
|
9
|
+
def test_extraction_correct_value_subarray_file(subarray_test_file):
|
10
|
+
"""Test the extraction of metadata from a FITS file."""
|
11
|
+
with fits.open(subarray_test_file) as hdul:
|
12
|
+
metadata_header = extract_metadata_from_headers(hdul)
|
13
|
+
|
14
|
+
metadata_payload = extract_metadata_from_data(subarray_test_file)
|
15
|
+
metadata_fits = {**metadata_header, **metadata_payload}
|
16
|
+
|
17
|
+
assert len(metadata_fits) > 0, "No metadata found in the SUBARRAY FITS"
|
18
|
+
|
19
|
+
expected_keys_in_fits_file = {
|
20
|
+
"observatory": "CTA",
|
21
|
+
"start_time": "2025-02-04T21:34:05",
|
22
|
+
"end_time": "2025-02-04T21:43:12",
|
23
|
+
"subarray_id": 0,
|
24
|
+
"sb_id": 2000000066,
|
25
|
+
"obs_id": 2000000200,
|
26
|
+
}
|
27
|
+
|
28
|
+
for key, value in expected_keys_in_fits_file.items():
|
29
|
+
assert metadata_fits[key] == value, f"Expected key '{key}' not found."
|
30
|
+
|
31
|
+
|
32
|
+
def test_extraction_correct_value_tel_trigger_file(tel_trigger_test_file):
|
33
|
+
"""Test the extraction of metadata from a FITS file."""
|
34
|
+
with fits.open(tel_trigger_test_file) as hdul:
|
35
|
+
metadata_header = extract_metadata_from_headers(hdul)
|
36
|
+
|
37
|
+
metadata_payload = extract_metadata_from_data(tel_trigger_test_file)
|
38
|
+
metadata_fits = {**metadata_header, **metadata_payload}
|
39
|
+
|
40
|
+
assert len(metadata_fits) > 0, "No metadata found in the Telescope TRIGGER FITS"
|
41
|
+
|
42
|
+
expected_keys_in_fits_file = {
|
43
|
+
"observatory": "CTA",
|
44
|
+
"start_time": "2025-02-04T21:34:05",
|
45
|
+
"end_time": "2025-02-04T21:43:11",
|
46
|
+
"tel_ids": [1],
|
47
|
+
"sb_id": 2000000066,
|
48
|
+
"obs_id": 2000000200,
|
49
|
+
}
|
50
|
+
|
51
|
+
for key, value in expected_keys_in_fits_file.items():
|
52
|
+
assert metadata_fits[key] == value, f"Expected key '{key}' not found."
|
53
|
+
|
54
|
+
|
55
|
+
def test_extraction_correct_value_tel_events_file(tel_events_test_file):
|
56
|
+
"""Test the extraction of metadata from a FITS file."""
|
57
|
+
with fits.open(tel_events_test_file) as hdul:
|
58
|
+
metadata_header = extract_metadata_from_headers(hdul)
|
59
|
+
|
60
|
+
metadata_payload = extract_metadata_from_data(tel_events_test_file)
|
61
|
+
metadata_fits = {**metadata_header, **metadata_payload}
|
62
|
+
|
63
|
+
assert len(metadata_fits) > 0, "No metadata found in the Telescope EVENTS FITS"
|
64
|
+
|
65
|
+
expected_keys_in_fits_file = {
|
66
|
+
"observatory": "CTA",
|
67
|
+
"start_time": "2025-04-01T15:25:02",
|
68
|
+
"end_time": "2025-04-01T15:25:03",
|
69
|
+
"sb_id": 0,
|
70
|
+
"obs_id": 0,
|
71
|
+
}
|
72
|
+
|
73
|
+
for key, value in expected_keys_in_fits_file.items():
|
74
|
+
assert metadata_fits[key] == value, f"Expected key '{key}' not found."
|
75
|
+
|
76
|
+
|
77
|
+
def test_extract_metadata_from_data_incorrect_header(tmp_path):
|
78
|
+
"""Test the extraction of metadata from an empty FITS file header."""
|
79
|
+
fits_file_path = tmp_path / "empty_fits.fits.fz"
|
80
|
+
hdul = fits.HDUList([fits.PrimaryHDU()])
|
81
|
+
hdul.writeto(fits_file_path, checksum=True)
|
82
|
+
|
83
|
+
with fits.open(fits_file_path) as hdul:
|
84
|
+
metadata = extract_metadata_from_headers(hdul)
|
85
|
+
|
86
|
+
assert metadata == {}, "Expected empty metadata in the header"
|
87
|
+
|
88
|
+
|
89
|
+
def test_extract_metadata_from_data_incorrect_data(tmp_path):
|
90
|
+
"""Test the extraction of metadata from an empty FITS file data."""
|
91
|
+
fits_file_path = tmp_path / "empty_fits.fits.fz"
|
92
|
+
hdul = fits.HDUList([fits.PrimaryHDU()])
|
93
|
+
hdul.writeto(fits_file_path, checksum=True)
|
94
|
+
|
95
|
+
metadata = extract_metadata_from_data(fits_file_path)
|
96
|
+
|
97
|
+
assert metadata == {}, "Expected empty metadata in the payload"
|
@@ -1,8 +1,5 @@
|
|
1
|
-
import os
|
2
1
|
import subprocess as sp
|
3
|
-
from datetime import datetime
|
4
2
|
from pathlib import Path
|
5
|
-
from secrets import token_hex
|
6
3
|
|
7
4
|
import pytest
|
8
5
|
from rucio.client.rseclient import RSEClient
|
@@ -24,41 +21,25 @@ def test_shared_storage(storage_mount_path: Path) -> Path:
|
|
24
21
|
), f"Shared storage {storage_mount_path} is not available on the client"
|
25
22
|
|
26
23
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
unique_id = f"{datetime.now():%Y%m%d_%H%M%S}_{token_hex(8)}"
|
31
|
-
test_file_name = f"/ctao.dpps.test/{test_scope}/testfile_{unique_id}.txt"
|
32
|
-
test_file_path = storage_mount_path / test_file_name.lstrip("/")
|
33
|
-
test_file_content = f"This is a test file {unique_id}"
|
34
|
-
test_file_path.parent.mkdir(parents=True, exist_ok=True)
|
35
|
-
test_file_path.write_text(test_file_content)
|
36
|
-
assert test_file_path.exists(), f"Test file {test_file_path} was not created successfully at {storage_mount_path}"
|
37
|
-
|
38
|
-
return test_file_name, test_file_content
|
39
|
-
|
40
|
-
|
41
|
-
def test_file_access_from_onsite_storage_using_gfal(test_file: tuple[Path, str]):
|
24
|
+
def test_file_access_from_onsite_storage_using_gfal(
|
25
|
+
storage_mount_path: Path, onsite_test_file: tuple[Path, str]
|
26
|
+
):
|
42
27
|
"""Verify that the file is accessible from the onsite storage pod using gfal-ls"""
|
43
|
-
|
44
|
-
|
28
|
+
test_file_path, _ = onsite_test_file
|
29
|
+
test_file_lfn = f"/{test_file_path.relative_to(storage_mount_path)}"
|
30
|
+
test_file_name = test_file_path.name
|
45
31
|
|
46
|
-
gfal_url = f"{STORAGE_PROTOCOL}://{STORAGE_HOSTNAME}/rucio{test_file_lfn}"
|
32
|
+
gfal_url = f"{STORAGE_PROTOCOL}://{STORAGE_HOSTNAME}/rucio/{test_file_lfn}"
|
47
33
|
cmd = ["gfal-ls", gfal_url]
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
)
|
58
|
-
|
59
|
-
assert any(
|
60
|
-
test_file_name in line for line in stdout.splitlines()
|
61
|
-
), f"File {test_file_name} not accessible; gfal-ls output: {stdout!r}"
|
34
|
+
|
35
|
+
ret = sp.run(cmd, capture_output=True, text=True)
|
36
|
+
stdout = ret.stdout.strip()
|
37
|
+
stderr = ret.stderr.strip()
|
38
|
+
msg = f"gfal-ls failed for {gfal_url}:\nSTDERR: {stderr}\nSTDOUT: {stderr}"
|
39
|
+
assert ret.returncode == 0, msg
|
40
|
+
|
41
|
+
msg = f"File {test_file_name} not accessible; gfal-ls output: {stdout!r}"
|
42
|
+
assert any(test_file_name in line for line in stdout.splitlines()), msg
|
62
43
|
|
63
44
|
|
64
45
|
@pytest.mark.usefixtures("_auth_proxy")
|
bdms/tests/utils.py
CHANGED
@@ -13,9 +13,27 @@ from rucio.common.exception import RucioException
|
|
13
13
|
# Default timeout and polling interval (in seconds) for waiting for replication
|
14
14
|
DEFAULT_TIMEOUT = 1000
|
15
15
|
DEFAULT_POLL_INTERVAL = 30
|
16
|
+
XROOTD_UID = int(os.getenv("XROOTD_UID", 994))
|
17
|
+
XROOTD_GID = int(os.getenv("XROOTD_GID", 994))
|
16
18
|
LOGGER = logging.getLogger(__name__)
|
17
19
|
|
18
20
|
|
21
|
+
def reset_xrootd_permissions(path):
|
22
|
+
recursive_chown(path, uid=XROOTD_UID, gid=XROOTD_GID)
|
23
|
+
|
24
|
+
|
25
|
+
def recursive_chown(path: Path, uid: int, gid: int):
|
26
|
+
"""Equivalent of unix chmod -R <uid>:<gid> <path>."""
|
27
|
+
os.chown(path, uid, gid)
|
28
|
+
|
29
|
+
for root, dirs, files in os.walk(path):
|
30
|
+
root = Path(root)
|
31
|
+
for d in dirs:
|
32
|
+
os.chown(root / d, uid, gid)
|
33
|
+
for f in files:
|
34
|
+
os.chown(root / f, uid, gid)
|
35
|
+
|
36
|
+
|
19
37
|
def wait_for_replication_status(
|
20
38
|
rule_client: RuleClient,
|
21
39
|
rule_id: str,
|
@@ -1,9 +1,9 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: ctao-bdms-clients
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.1
|
4
4
|
Summary: Client module for the CTAO DPPS Bulk Data Management System
|
5
5
|
Author-email: Georgios Zacharis <georgios.zacharis@inaf.it>, Stefano Gallozzi <Stefano.gallozzi@inaf.it>, Michele Mastropietro <michele.mastropietro@inaf.it>, Syed Anwar Ul Hasan <syedanwarul.hasan@cta-consortium.org>, Maximilian Linhoff <maximilian.linhoff@cta-observatory.org>, Volodymyr Savchenko <Volodymyr.Savchenko@epfl.ch>
|
6
|
-
License: BSD-3-Clause
|
6
|
+
License-Expression: BSD-3-Clause
|
7
7
|
Project-URL: repository, https://gitlab.cta-observatory.org/cta-computing/dpps/bdms/bdms
|
8
8
|
Project-URL: documentation, http://cta-computing.gitlab-pages.cta-observatory.org/dpps/bdms/bdms
|
9
9
|
Requires-Python: >=3.9
|
@@ -12,12 +12,14 @@ License-File: LICENSE
|
|
12
12
|
Requires-Dist: astropy<8.0.0a0,>=6.0.1
|
13
13
|
Requires-Dist: ctao-bdms-rucio-policy~=0.1.0
|
14
14
|
Requires-Dist: rucio-clients~=35.7.0
|
15
|
+
Requires-Dist: protozfits>=2.7.2
|
15
16
|
Provides-Extra: test
|
16
17
|
Requires-Dist: pytest; extra == "test"
|
17
18
|
Requires-Dist: pytest-cov; extra == "test"
|
18
19
|
Requires-Dist: pytest-requirements; extra == "test"
|
19
20
|
Requires-Dist: python-dotenv; extra == "test"
|
20
21
|
Requires-Dist: minio; extra == "test"
|
22
|
+
Requires-Dist: pytest-xdist; extra == "test"
|
21
23
|
Provides-Extra: doc
|
22
24
|
Requires-Dist: sphinx; extra == "doc"
|
23
25
|
Requires-Dist: numpydoc; extra == "doc"
|
@@ -0,0 +1,20 @@
|
|
1
|
+
bdms/__init__.py,sha256=7btE6tNhFqXSv2eUhZ-0m1J3nTTs4Xo6HWcQI4eh5Do,142
|
2
|
+
bdms/_version.py,sha256=UoNvMtd4wCG76RwoSpNCUtaFyTwakGcZolfjXzNVSMY,511
|
3
|
+
bdms/acada_ingestion.py,sha256=L-LBdfd7dbSbW0poseXsZ8CbgWch8j57yaQncIemnOs,17671
|
4
|
+
bdms/extract_fits_metadata.py,sha256=ZGJQCFJCXkWg8N3CAb17GB-wwPj-wTvNg0JOS-MemZ0,3431
|
5
|
+
bdms/version.py,sha256=mTfi1WzbIs991NyImM6mcMg1R39a6U1W2pKnk-Tt5Vw,765
|
6
|
+
bdms/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
+
bdms/tests/conftest.py,sha256=TqMBSExgO4omUHVqoXcI1AOhS3F8de-03275IlYEW6k,3896
|
8
|
+
bdms/tests/test_acada_ingestion.py,sha256=u27Srhm8kXKtFoPr5gQdZeGDjrlECMr9ysWwGfs2w_Q,18277
|
9
|
+
bdms/tests/test_basic_rucio_functionality.py,sha256=9GIX8IO6wBJm40LKFEH2StS-fMKvC07sxFHPVR7dftU,3583
|
10
|
+
bdms/tests/test_dpps_rel_0_0.py,sha256=MnbuBoS_kUUiMcHE3-jqOzekQNUa-wcsjCJqJQ2J9S4,2957
|
11
|
+
bdms/tests/test_extract_fits_metadata.py,sha256=A935WD2TF3lBcaeDmzGSlH2IXUF1v8qslrsW30lnEAA,3490
|
12
|
+
bdms/tests/test_file_replicas.py,sha256=NqutrSJa5ME50JpmyATNPSLqq1AOq1ruv84XSY3PKLI,2635
|
13
|
+
bdms/tests/test_metadata.py,sha256=f0tSqNGlYe-ydoSDJw0k1De2kHoPl6g-GYBj_jP6kCY,3728
|
14
|
+
bdms/tests/test_onsite_storage.py,sha256=waK7t9kBquzJbuLLYcpeNU9YuA70XTRS88RMxBWxawI,3765
|
15
|
+
bdms/tests/utils.py,sha256=4g7__ms-xnTyyBKMlmV4hpC505V6uVaXJDi9XQ8UC_4,3717
|
16
|
+
ctao_bdms_clients-0.2.1.dist-info/licenses/LICENSE,sha256=Py9riZY_f0CmXbrZ5JreE3WgglyWkRnwUfqydvX6jxE,1556
|
17
|
+
ctao_bdms_clients-0.2.1.dist-info/METADATA,sha256=1LZfXzTMTtQUqn1Npr11vfQnZ2m_GUbkdnGXWRF-e3Y,2383
|
18
|
+
ctao_bdms_clients-0.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
19
|
+
ctao_bdms_clients-0.2.1.dist-info/top_level.txt,sha256=ao0U8aA33KRHpcqmr7yrK8y2AQ6ahSu514tfaN4hDV8,5
|
20
|
+
ctao_bdms_clients-0.2.1.dist-info/RECORD,,
|
@@ -1,18 +0,0 @@
|
|
1
|
-
bdms/__init__.py,sha256=7btE6tNhFqXSv2eUhZ-0m1J3nTTs4Xo6HWcQI4eh5Do,142
|
2
|
-
bdms/_version.py,sha256=6j6NVXRMR-dX2osPsF0-SkvP1-ofWxEz6ew_4VL2kCY,521
|
3
|
-
bdms/acada_ingestion.py,sha256=bKnXbAYvtYHYQk6ir5Sw1YIjCXGZTyk3IpZz-XGkkPo,16248
|
4
|
-
bdms/version.py,sha256=mTfi1WzbIs991NyImM6mcMg1R39a6U1W2pKnk-Tt5Vw,765
|
5
|
-
bdms/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
bdms/tests/conftest.py,sha256=lArkd8Kn7Ef19_BhqXq77taei9LKggWUu3FDUhrt9M4,3009
|
7
|
-
bdms/tests/test_acada_ingestion.py,sha256=A0G9-ssHN3dx0Jz_eIN72dQp21gfZqdQnyAgLY3BDF4,17738
|
8
|
-
bdms/tests/test_basic_rucio_functionality.py,sha256=GFUCq2QlM0M_5k5Qz9iPXPftE6nGuGYbW_IVS76T978,3604
|
9
|
-
bdms/tests/test_dpps_rel_0_0.py,sha256=MnbuBoS_kUUiMcHE3-jqOzekQNUa-wcsjCJqJQ2J9S4,2957
|
10
|
-
bdms/tests/test_file_replicas.py,sha256=NqutrSJa5ME50JpmyATNPSLqq1AOq1ruv84XSY3PKLI,2635
|
11
|
-
bdms/tests/test_metadata.py,sha256=f0tSqNGlYe-ydoSDJw0k1De2kHoPl6g-GYBj_jP6kCY,3728
|
12
|
-
bdms/tests/test_onsite_storage.py,sha256=xBwVbr2q0KHnesIrF0I8ova_hfDXDs3CBya2Sxi6VWM,4633
|
13
|
-
bdms/tests/utils.py,sha256=fh23X6iN2-lsoRBU3WSeWkweiHZlOtIUK5xzHbWyP6c,3185
|
14
|
-
ctao_bdms_clients-0.2.0rc1.dist-info/licenses/LICENSE,sha256=Py9riZY_f0CmXbrZ5JreE3WgglyWkRnwUfqydvX6jxE,1556
|
15
|
-
ctao_bdms_clients-0.2.0rc1.dist-info/METADATA,sha256=88TkbmaMgsbU1dwCRzPHKWK-yYb323BT1HqFgsQboEg,2297
|
16
|
-
ctao_bdms_clients-0.2.0rc1.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
|
17
|
-
ctao_bdms_clients-0.2.0rc1.dist-info/top_level.txt,sha256=ao0U8aA33KRHpcqmr7yrK8y2AQ6ahSu514tfaN4hDV8,5
|
18
|
-
ctao_bdms_clients-0.2.0rc1.dist-info/RECORD,,
|
File without changes
|
File without changes
|