ctao-bdms-clients 0.2.0rc1__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bdms/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.2.0rc1'
21
- __version_tuple__ = version_tuple = (0, 2, 0, 'rc1')
20
+ __version__ = version = '0.2.1'
21
+ __version_tuple__ = version_tuple = (0, 2, 1)
bdms/acada_ingestion.py CHANGED
@@ -4,16 +4,16 @@ This module provides the IngestionClient class to manage the ingestion of ACADA
4
4
  It includes functionality for constructing FITS file paths, converting ACADA paths to Logical File Names (LFNs),
5
5
  and registering replicas in Rucio.
6
6
  """
7
- # this is the latest working file (as on 6:40 pm) with judge repairer repairing the STUCK rule.
8
7
 
9
8
  import logging
10
9
  import os
10
+ from contextlib import ExitStack
11
11
  from pathlib import Path
12
12
  from typing import Optional, Union
13
13
 
14
14
  from astropy.io import fits
15
15
  from rucio.client.accountclient import AccountClient
16
- from rucio.client.client import Client
16
+ from rucio.client.client import Client, DIDClient
17
17
  from rucio.client.replicaclient import ReplicaClient
18
18
  from rucio.client.rseclient import RSEClient
19
19
  from rucio.client.ruleclient import RuleClient
@@ -21,6 +21,11 @@ from rucio.client.scopeclient import ScopeClient
21
21
  from rucio.common.exception import Duplicate, RucioException
22
22
  from rucio.common.utils import adler32
23
23
 
24
+ from bdms.extract_fits_metadata import (
25
+ extract_metadata_from_data,
26
+ extract_metadata_from_headers,
27
+ )
28
+
24
29
  LOGGER = logging.getLogger(__name__)
25
30
 
26
31
 
@@ -91,6 +96,7 @@ class IngestionClient:
91
96
  self.account_client = AccountClient()
92
97
  self.rse_client = RSEClient()
93
98
  self.rule_client = RuleClient()
99
+ self.did_client = DIDClient()
94
100
  except RucioException as e:
95
101
  self.logger.error("Failed to initialize Rucio clients: %s", str(e))
96
102
  raise
@@ -261,9 +267,10 @@ class IngestionClient:
261
267
  return lfn
262
268
 
263
269
  # Proceed with registering the replica if check_replica_exists returns False
270
+ valid, metadata = verify_and_extract_metadata(acada_path)
271
+ metadata["valid_fits_checksum"] = valid
264
272
 
265
- # Compute file metadata
266
- # TODO: use functions to identify file type, extract metadata, validate integrity, when this functionality is ready https://gitlab.cta-observatory.org/cta-computing/dpps/bdms/bdms/-/work_items/46
273
+ # Compute rucio file metadata
267
274
  file_size = acada_path.stat().st_size
268
275
  checksum = adler32(acada_path)
269
276
 
@@ -286,6 +293,10 @@ class IngestionClient:
286
293
  )
287
294
  self.logger.info("Successfully registered the replica for lfn '%s'", lfn)
288
295
 
296
+ if len(metadata) > 0:
297
+ self.did_client.set_metadata_bulk(scope=self.scope, name=lfn, meta=metadata)
298
+ self.logger.info("Set metadata of %r to %r", lfn, metadata)
299
+
289
300
  return lfn
290
301
 
291
302
  def add_offsite_replication_rules(
@@ -433,3 +444,36 @@ def verify_fits_checksum(hdul: fits.HDUList):
433
444
  raise FITSVerificationError(msg)
434
445
  elif checksum_result == 2 and pos != 0: # ignore primary for warning
435
446
  LOGGER.warning("No CHECKSUM in HDU %d with name %r", pos, name)
447
+
448
+
449
+ def verify_and_extract_metadata(fits_path):
450
+ """Verify checksums and extract metadata from FITS files.
451
+
452
+ This wrapper transforms exceptions into log errors and minimizes
453
+ the number of times the FITS file has to be opened.
454
+ """
455
+ # this context manager allows elegant handling
456
+ # of conditionally present context managers
457
+ # which allows better handling of exceptions below
458
+ context = ExitStack()
459
+ metadata = {}
460
+ with context:
461
+ try:
462
+ hdul = context.enter_context(fits.open(fits_path))
463
+ except Exception as e:
464
+ LOGGER.error("Failed to open FITS file %r: %s", fits_path, e)
465
+ return False, metadata
466
+
467
+ try:
468
+ verify_fits_checksum(hdul)
469
+ except FITSVerificationError as e:
470
+ LOGGER.error("File %r failed FITS checksum verification: %s", fits_path, e)
471
+ return False, metadata
472
+
473
+ try:
474
+ metadata = extract_metadata_from_headers(hdul)
475
+ metadata.update(extract_metadata_from_data(fits_path))
476
+ return True, metadata
477
+ except Exception as e:
478
+ LOGGER.error("Failed to extract metadata from %r: %s", fits_path, e)
479
+ return False, metadata
@@ -0,0 +1,134 @@
1
+ """Functions to extract metadata from input files."""
2
+
3
+ import logging
4
+
5
+ import numpy as np
6
+ from protozfits import File
7
+
8
+ # Configure logger
9
+ logger = logging.getLogger(__name__)
10
+
11
+ # COMMON HEADER
12
+ start_time = "DataStream.DATE"
13
+
14
+ # COMMON DATA
15
+ origin = "DataStream.ORIGIN"
16
+ sb_id = "DataStream.sb_id"
17
+ obs_id = "DataStream.obs_id"
18
+
19
+ # -- FOR TEL_TRIG
20
+ tel_ids = "DataStream.tel_ids"
21
+
22
+ # -- FOR TEL_SUB
23
+ subarray_id = "DataStream.subarray_id"
24
+
25
+ METADATA_TEL = {
26
+ "HEADER": {
27
+ "observatory": origin,
28
+ "start_time": start_time,
29
+ "end_time": "Events.DATEEND",
30
+ },
31
+ "PAYLOAD": {
32
+ "sb_id": sb_id,
33
+ "obs_id": obs_id,
34
+ },
35
+ }
36
+
37
+ METADATA_SUB = {
38
+ "HEADER": {
39
+ "observatory": origin,
40
+ "start_time": start_time,
41
+ "end_time": "SubarrayEvents.DATEEND",
42
+ },
43
+ "PAYLOAD": {
44
+ "subarray_id": subarray_id,
45
+ "sb_id": sb_id,
46
+ "obs_id": obs_id,
47
+ },
48
+ }
49
+
50
+ METADATA_TRIG = {
51
+ "HEADER": {
52
+ "observatory": origin,
53
+ "start_time": start_time,
54
+ "end_time": "Triggers.DATEEND",
55
+ },
56
+ "PAYLOAD": {
57
+ "tel_ids": tel_ids,
58
+ "sb_id": sb_id,
59
+ "obs_id": obs_id,
60
+ },
61
+ }
62
+
63
+ #: Mapping from DataStream.PBFHEAD to the metadata items we want to collect
64
+ METADATA_SCHEMAS = {
65
+ "DL0v1.Trigger.DataStream": METADATA_TRIG,
66
+ "DL0v1.Subarray.DataStream": METADATA_SUB,
67
+ "DL0v1.Telescope.DataStream": METADATA_TEL,
68
+ }
69
+
70
+
71
+ def extract_metadata_from_headers(hdul):
72
+ """Extract metadata from FITS headers of hdul."""
73
+ all_headers = {}
74
+ for hdu in hdul:
75
+ if hdu.is_image:
76
+ continue
77
+ all_headers[hdu.name] = dict(hdu.header)
78
+
79
+ try:
80
+ all_headers["DataStream"]
81
+ except KeyError:
82
+ logger.error("No DataStream HDU found in the FITS file.")
83
+ return {}
84
+
85
+ pbfhead = all_headers["DataStream"]["PBFHEAD"]
86
+ schema = METADATA_SCHEMAS.get(pbfhead)
87
+ if schema is None:
88
+ logger.error(
89
+ "The PBFHEAD %r does not correspond to any known FITS type.", pbfhead
90
+ )
91
+ return {}
92
+
93
+ logger.debug("Headers extracted: %s", all_headers.keys())
94
+
95
+ metadata = {}
96
+ for value_name, metadata_path in schema["HEADER"].items():
97
+ extname, header_key = metadata_path.split(".")
98
+ table = all_headers[extname][header_key]
99
+ metadata[value_name] = table
100
+
101
+ return metadata
102
+
103
+
104
+ def extract_metadata_from_data(path):
105
+ """Extract metadata from zFITS payload in path."""
106
+ with File(path) as f:
107
+ if not hasattr(f, "DataStream"):
108
+ return {}
109
+
110
+ pbfhead = f.DataStream.header["PBFHEAD"]
111
+ schema = METADATA_SCHEMAS.get(pbfhead)
112
+ if schema is None:
113
+ logger.error(
114
+ "The PBFHEAD %r does not correspond to any known FITS type.", pbfhead
115
+ )
116
+ return {}
117
+
118
+ metadata = {}
119
+ for value_name, metadata_path in schema["PAYLOAD"].items():
120
+ hdu, column = metadata_path.split(".")
121
+ row = getattr(f, hdu)[0]
122
+ metadata[value_name] = getattr(row, column)
123
+
124
+ if isinstance(metadata[value_name], np.ndarray):
125
+ # Convert numpy array to a Python list
126
+ metadata[value_name] = metadata[value_name].tolist()
127
+
128
+ logger.debug(
129
+ "Value '%s' from '%s' extracted. (renamed as '%s')",
130
+ column,
131
+ hdu,
132
+ value_name,
133
+ )
134
+ return metadata
bdms/tests/conftest.py CHANGED
@@ -8,7 +8,7 @@ from secrets import token_hex
8
8
  import pytest
9
9
  from rucio.client.scopeclient import ScopeClient
10
10
 
11
- from bdms.tests.utils import download_test_file
11
+ from bdms.tests.utils import download_test_file, reset_xrootd_permissions
12
12
 
13
13
  USER_CERT = os.getenv("RUCIO_CFG_CLIENT_CERT", "/opt/rucio/etc/usercert.pem")
14
14
  USER_KEY = os.getenv("RUCIO_CFG_CLIENT_KEY", "/opt/rucio/etc/userkey.pem")
@@ -90,5 +90,28 @@ def tel_trigger_test_file():
90
90
  @pytest.fixture(scope="session")
91
91
  def tel_events_test_file():
92
92
  """Fixture to download a telescope events test file"""
93
- path = "acada-small/DL0/LSTN-01/ctao-n-acada/acada-adh/events/2025/02/04/TEL001_SDH0000_20250204T213354_SBID0000000002000000066_OBSID0000000002000000200_CHUNK000.fits.fz"
93
+ path = "acada-small/DL0/LSTN-01/ctao-n-acada/acada-adh/events/2025/02/04/TEL001_SDH0000_20250204T213354_SBID0000000002000000066_OBSID0000000002000000200_CHUNK001.fits.fz"
94
94
  return download_test_file(path)
95
+
96
+
97
+ @pytest.fixture
98
+ def onsite_test_file(
99
+ storage_mount_path: Path, test_scope: str, test_vo: str
100
+ ) -> tuple[Path, str]:
101
+ """Create a dummy file in the shared storage for testing."""
102
+
103
+ unique_id = f"{datetime.now():%Y%m%d_%H%M%S}_{token_hex(8)}"
104
+ filename = f"testfile_{unique_id}.txt"
105
+
106
+ test_file_path = storage_mount_path / test_vo / test_scope / filename
107
+ test_file_path.parent.mkdir(parents=True, exist_ok=True)
108
+
109
+ # Write a small test content (simulating a .fits.fz file with minimal content for testing)
110
+ test_file_content = f"Test file with random content: {unique_id}"
111
+ test_file_path.write_text(test_file_content)
112
+
113
+ # need to change file permissions of created directories so that
114
+ # the xrootd still can read and write there
115
+ reset_xrootd_permissions(storage_mount_path)
116
+
117
+ return test_file_path, test_file_content
@@ -5,11 +5,9 @@ and the replication of data between Rucio storage elements (RSEs).
5
5
  """
6
6
 
7
7
  import logging
8
- import os
9
8
  import subprocess
10
- from datetime import datetime
11
9
  from pathlib import Path
12
- from secrets import token_hex
10
+ from shutil import copy2
13
11
 
14
12
  import pytest
15
13
  from astropy.io import fits
@@ -22,12 +20,10 @@ from rucio.common.exception import RucioException
22
20
  from rucio.common.utils import adler32
23
21
 
24
22
  from bdms.acada_ingestion import IngestionClient
25
- from bdms.tests.utils import wait_for_replication_status
23
+ from bdms.tests.utils import reset_xrootd_permissions, wait_for_replication_status
26
24
 
27
25
  LOGGER = logging.getLogger(__name__)
28
26
 
29
- XROOTD_UID = 994
30
- XROOTD_GID = 994
31
27
  ONSITE_RSE = "STORAGE-1"
32
28
  OFFSITE_RSE_1 = "STORAGE-2"
33
29
  OFFSITE_RSE_2 = "STORAGE-3"
@@ -41,16 +37,6 @@ def test_shared_storage(storage_mount_path: Path):
41
37
  ), f"Shared storage {storage_mount_path} is not available on the client"
42
38
 
43
39
 
44
- def recursive_chown(path: Path, uid: int, gid: int):
45
- """Equivalent of unix chmod -R <uid>:<gid> <path>."""
46
- for root, dirs, files in os.walk(path):
47
- root = Path(root)
48
- for d in dirs:
49
- os.chown(root / d, uid, gid)
50
- for f in files:
51
- os.chown(root / f, uid, gid)
52
-
53
-
54
40
  def trigger_judge_repairer() -> None:
55
41
  """Trigger the rucio-judge-repairer daemon to run once and fix any STUCK rules."""
56
42
 
@@ -80,30 +66,6 @@ def trigger_judge_repairer() -> None:
80
66
  raise
81
67
 
82
68
 
83
- @pytest.fixture
84
- def test_file(
85
- storage_mount_path: Path, test_scope: str, test_vo: str
86
- ) -> tuple[Path, str]:
87
- """Create a dummy .fits.fz file in the shared storage for testing."""
88
-
89
- unique_id = f"{datetime.now():%Y%m%d_%H%M%S}_{token_hex(8)}"
90
- filename = f"testfile_{unique_id}.fits.fz"
91
-
92
- test_file_path = storage_mount_path / test_vo / test_scope / filename
93
- test_file_path.parent.mkdir(parents=True, exist_ok=True)
94
-
95
- # need to change file permissions of created directories so that
96
- # the xrootd still can read and write there
97
- recursive_chown(storage_mount_path / test_vo, XROOTD_UID, XROOTD_GID)
98
-
99
- # Write a small test content (simulating a .fits.fz file with minimal content for testing)
100
- test_file_content = f"FITS-like content for {unique_id}"
101
- test_file_path.write_text(test_file_content)
102
- os.chown(test_file_path, XROOTD_UID, XROOTD_GID)
103
-
104
- return test_file_path, test_file_content
105
-
106
-
107
69
  def test_acada_to_lfn(storage_mount_path: Path, test_vo: str):
108
70
  """Test the acada_to_lfn method of IngestionClient with valid and invalid inputs."""
109
71
 
@@ -151,7 +113,10 @@ def test_acada_to_lfn(storage_mount_path: Path, test_vo: str):
151
113
 
152
114
  @pytest.mark.usefixtures("_auth_proxy")
153
115
  def test_check_replica_exists(
154
- storage_mount_path: Path, test_scope: str, test_file: tuple[Path, str], test_vo: str
116
+ storage_mount_path: Path,
117
+ test_scope: str,
118
+ onsite_test_file: tuple[Path, str],
119
+ test_vo: str,
155
120
  ):
156
121
  """Test the check_replica_exists method of IngestionClient."""
157
122
 
@@ -159,7 +124,7 @@ def test_check_replica_exists(
159
124
  storage_mount_path, ONSITE_RSE, scope=test_scope, vo=test_vo
160
125
  )
161
126
 
162
- acada_path, _ = test_file
127
+ acada_path, _ = onsite_test_file
163
128
 
164
129
  # Generate the LFN
165
130
  lfn = ingestion_client.acada_to_lfn(acada_path)
@@ -181,10 +146,54 @@ def test_check_replica_exists(
181
146
  assert not ingestion_client.check_replica_exists(nonexistent_lfn), msg
182
147
 
183
148
 
149
+ @pytest.fixture
150
+ def file_location(request):
151
+ return request.getfixturevalue(request.param)
152
+
153
+
154
+ @pytest.mark.parametrize(
155
+ ("file_location", "metadata_dict"),
156
+ [
157
+ (
158
+ "subarray_test_file",
159
+ {
160
+ "observatory": "CTA",
161
+ "start_time": "2025-02-04T21:34:05",
162
+ "end_time": "2025-02-04T21:43:12",
163
+ "subarray_id": 0,
164
+ "sb_id": 2000000066,
165
+ "obs_id": 2000000200,
166
+ },
167
+ ),
168
+ (
169
+ "tel_trigger_test_file",
170
+ {
171
+ "observatory": "CTA",
172
+ "start_time": "2025-02-04T21:34:05",
173
+ "end_time": "2025-02-04T21:43:11",
174
+ "tel_ids": [1],
175
+ "sb_id": 2000000066,
176
+ "obs_id": 2000000200,
177
+ },
178
+ ),
179
+ (
180
+ "tel_events_test_file",
181
+ {
182
+ "observatory": "CTA",
183
+ "start_time": "2025-04-01T15:25:02",
184
+ "end_time": "2025-04-01T15:25:03",
185
+ "sb_id": 0,
186
+ "obs_id": 0,
187
+ },
188
+ ),
189
+ ],
190
+ indirect=["file_location"],
191
+ )
184
192
  @pytest.mark.usefixtures("_auth_proxy")
185
193
  @pytest.mark.verifies_usecase("UC-110-1.1.1")
186
- def test_add_onsite_replica_with_dummy_file(
187
- test_file: tuple[Path, str],
194
+ def test_add_onsite_replica_with_minio_fits_file(
195
+ file_location: str,
196
+ metadata_dict: dict,
188
197
  test_scope: str,
189
198
  tmp_path: Path,
190
199
  storage_mount_path,
@@ -193,11 +202,16 @@ def test_add_onsite_replica_with_dummy_file(
193
202
  ):
194
203
  """Test the add_onsite_replica method of IngestionClient using a dummy file."""
195
204
 
205
+ filename = str(file_location).split("/")[-1]
206
+ acada_path = storage_mount_path / test_vo / test_scope / filename
207
+ acada_path.parent.mkdir(parents=True, exist_ok=True)
208
+ copy2(file_location, str(acada_path))
209
+ reset_xrootd_permissions(storage_mount_path)
210
+
196
211
  ingestion_client = IngestionClient(
197
212
  storage_mount_path, ONSITE_RSE, scope=test_scope, vo=test_vo
198
213
  )
199
214
 
200
- acada_path, test_file_content = test_file
201
215
  # Use add_onsite_replica to register the replica
202
216
  lfn = ingestion_client.add_onsite_replica(acada_path=acada_path)
203
217
 
@@ -218,11 +232,9 @@ def test_add_onsite_replica_with_dummy_file(
218
232
  download_path = tmp_path / lfn.lstrip("/")
219
233
  assert download_path.is_file(), f"Download failed at {download_path}"
220
234
 
221
- downloaded_content = download_path.read_text()
222
- assert downloaded_content == test_file_content, (
223
- f"Downloaded file content does not match the original. "
224
- f"Expected: {test_file_content}, Got: {downloaded_content}"
225
- )
235
+ assert adler32(download_path) == adler32(
236
+ file_location
237
+ ), "Downloaded file content does not match the original. "
226
238
 
227
239
  # Check for don't ingest again if its already registered
228
240
  caplog.clear()
@@ -231,6 +243,19 @@ def test_add_onsite_replica_with_dummy_file(
231
243
  r.message for r in caplog.records
232
244
  ]
233
245
 
246
+ # Retrieve metadata using the DIDClient
247
+ did_client = Client()
248
+ retrieved_metadata = did_client.get_metadata(
249
+ scope=ingestion_client.scope, name=lfn, plugin="JSON"
250
+ )
251
+
252
+ # Verify the metadata matches the expected metadata
253
+ for key, value in metadata_dict.items():
254
+ assert retrieved_metadata.get(key) == value, (
255
+ f"Metadata mismatch for key '{key}'. "
256
+ f"Expected: {value}, Got: {retrieved_metadata.get(key)}"
257
+ )
258
+
234
259
 
235
260
  def test_rses():
236
261
  """Test that the expected RSEs are configured."""
@@ -245,14 +270,14 @@ def test_rses():
245
270
 
246
271
  @pytest.fixture
247
272
  def pre_existing_lfn(
248
- test_file: tuple[Path, str],
273
+ onsite_test_file: tuple[Path, str],
249
274
  test_scope: str,
250
275
  test_vo: str,
251
276
  ) -> str:
252
277
  """Fixture to provide an LFN for a replica pre-registered in Rucio without using IngestionClient."""
253
278
 
254
279
  # Construct the LFN manually based on the test file and scope
255
- acada_path, _ = test_file
280
+ acada_path, _ = onsite_test_file
256
281
  relative_path = str(acada_path).split(f"{test_vo}/{test_scope}/", 1)[-1]
257
282
  lfn = f"/{test_vo}/{test_scope}/{relative_path}"
258
283
  checksum = adler32(acada_path)
@@ -296,7 +321,7 @@ def test_add_offsite_replication_rules(
296
321
  test_vo: str,
297
322
  storage_mount_path: Path,
298
323
  tmp_path: Path,
299
- test_file: tuple[Path, str],
324
+ onsite_test_file: tuple[Path, str],
300
325
  caplog,
301
326
  ):
302
327
  """Test the add_offsite_replication_rules method of IngestionClient."""
@@ -309,7 +334,7 @@ def test_add_offsite_replication_rules(
309
334
  lfn = pre_existing_lfn
310
335
  did = {"scope": test_scope, "name": lfn}
311
336
 
312
- _, test_file_content = test_file # Get the test file content
337
+ _, test_file_content = onsite_test_file # Get the test file content
313
338
 
314
339
  offsite_rse_expression = "OFFSITE"
315
340
  copies = 2
@@ -389,7 +414,7 @@ def test_add_offsite_replication_rules_single_copy(
389
414
  test_vo: str,
390
415
  storage_mount_path: Path,
391
416
  tmp_path: Path,
392
- test_file: tuple[Path, str],
417
+ onsite_test_file: tuple[Path, str],
393
418
  caplog,
394
419
  ):
395
420
  """Test the add_offsite_replication_rules method of IngestionClient with a single copy (copies=1)."""
@@ -402,7 +427,7 @@ def test_add_offsite_replication_rules_single_copy(
402
427
  lfn = pre_existing_lfn
403
428
  did = {"scope": test_scope, "name": lfn}
404
429
 
405
- _, test_file_content = test_file
430
+ _, test_file_content = onsite_test_file
406
431
 
407
432
  offsite_rse_expression = "OFFSITE"
408
433
  copies = 1
@@ -124,7 +124,6 @@ def test_replication(test_vo, test_scope, tmp_path):
124
124
  rule_client,
125
125
  rule_id=rule,
126
126
  expected_status="OK",
127
- timeout=600,
128
127
  poll_interval=5,
129
128
  )
130
129
  replicas = next(replica_client.list_replicas(dids))
@@ -0,0 +1,97 @@
1
+ from astropy.io import fits
2
+
3
+ from bdms.extract_fits_metadata import (
4
+ extract_metadata_from_data,
5
+ extract_metadata_from_headers,
6
+ )
7
+
8
+
9
+ def test_extraction_correct_value_subarray_file(subarray_test_file):
10
+ """Test the extraction of metadata from a FITS file."""
11
+ with fits.open(subarray_test_file) as hdul:
12
+ metadata_header = extract_metadata_from_headers(hdul)
13
+
14
+ metadata_payload = extract_metadata_from_data(subarray_test_file)
15
+ metadata_fits = {**metadata_header, **metadata_payload}
16
+
17
+ assert len(metadata_fits) > 0, "No metadata found in the SUBARRAY FITS"
18
+
19
+ expected_keys_in_fits_file = {
20
+ "observatory": "CTA",
21
+ "start_time": "2025-02-04T21:34:05",
22
+ "end_time": "2025-02-04T21:43:12",
23
+ "subarray_id": 0,
24
+ "sb_id": 2000000066,
25
+ "obs_id": 2000000200,
26
+ }
27
+
28
+ for key, value in expected_keys_in_fits_file.items():
29
+ assert metadata_fits[key] == value, f"Expected key '{key}' not found."
30
+
31
+
32
+ def test_extraction_correct_value_tel_trigger_file(tel_trigger_test_file):
33
+ """Test the extraction of metadata from a FITS file."""
34
+ with fits.open(tel_trigger_test_file) as hdul:
35
+ metadata_header = extract_metadata_from_headers(hdul)
36
+
37
+ metadata_payload = extract_metadata_from_data(tel_trigger_test_file)
38
+ metadata_fits = {**metadata_header, **metadata_payload}
39
+
40
+ assert len(metadata_fits) > 0, "No metadata found in the Telescope TRIGGER FITS"
41
+
42
+ expected_keys_in_fits_file = {
43
+ "observatory": "CTA",
44
+ "start_time": "2025-02-04T21:34:05",
45
+ "end_time": "2025-02-04T21:43:11",
46
+ "tel_ids": [1],
47
+ "sb_id": 2000000066,
48
+ "obs_id": 2000000200,
49
+ }
50
+
51
+ for key, value in expected_keys_in_fits_file.items():
52
+ assert metadata_fits[key] == value, f"Expected key '{key}' not found."
53
+
54
+
55
+ def test_extraction_correct_value_tel_events_file(tel_events_test_file):
56
+ """Test the extraction of metadata from a FITS file."""
57
+ with fits.open(tel_events_test_file) as hdul:
58
+ metadata_header = extract_metadata_from_headers(hdul)
59
+
60
+ metadata_payload = extract_metadata_from_data(tel_events_test_file)
61
+ metadata_fits = {**metadata_header, **metadata_payload}
62
+
63
+ assert len(metadata_fits) > 0, "No metadata found in the Telescope EVENTS FITS"
64
+
65
+ expected_keys_in_fits_file = {
66
+ "observatory": "CTA",
67
+ "start_time": "2025-04-01T15:25:02",
68
+ "end_time": "2025-04-01T15:25:03",
69
+ "sb_id": 0,
70
+ "obs_id": 0,
71
+ }
72
+
73
+ for key, value in expected_keys_in_fits_file.items():
74
+ assert metadata_fits[key] == value, f"Expected key '{key}' not found."
75
+
76
+
77
+ def test_extract_metadata_from_data_incorrect_header(tmp_path):
78
+ """Test the extraction of metadata from an empty FITS file header."""
79
+ fits_file_path = tmp_path / "empty_fits.fits.fz"
80
+ hdul = fits.HDUList([fits.PrimaryHDU()])
81
+ hdul.writeto(fits_file_path, checksum=True)
82
+
83
+ with fits.open(fits_file_path) as hdul:
84
+ metadata = extract_metadata_from_headers(hdul)
85
+
86
+ assert metadata == {}, "Expected empty metadata in the header"
87
+
88
+
89
+ def test_extract_metadata_from_data_incorrect_data(tmp_path):
90
+ """Test the extraction of metadata from an empty FITS file data."""
91
+ fits_file_path = tmp_path / "empty_fits.fits.fz"
92
+ hdul = fits.HDUList([fits.PrimaryHDU()])
93
+ hdul.writeto(fits_file_path, checksum=True)
94
+
95
+ metadata = extract_metadata_from_data(fits_file_path)
96
+
97
+ assert metadata == {}, "Expected empty metadata in the payload"
@@ -1,8 +1,5 @@
1
- import os
2
1
  import subprocess as sp
3
- from datetime import datetime
4
2
  from pathlib import Path
5
- from secrets import token_hex
6
3
 
7
4
  import pytest
8
5
  from rucio.client.rseclient import RSEClient
@@ -24,41 +21,25 @@ def test_shared_storage(storage_mount_path: Path) -> Path:
24
21
  ), f"Shared storage {storage_mount_path} is not available on the client"
25
22
 
26
23
 
27
- @pytest.fixture(scope="session")
28
- def test_file(storage_mount_path, test_scope) -> tuple[Path, str]:
29
- """Create a test file in the shared storage and return its path and content"""
30
- unique_id = f"{datetime.now():%Y%m%d_%H%M%S}_{token_hex(8)}"
31
- test_file_name = f"/ctao.dpps.test/{test_scope}/testfile_{unique_id}.txt"
32
- test_file_path = storage_mount_path / test_file_name.lstrip("/")
33
- test_file_content = f"This is a test file {unique_id}"
34
- test_file_path.parent.mkdir(parents=True, exist_ok=True)
35
- test_file_path.write_text(test_file_content)
36
- assert test_file_path.exists(), f"Test file {test_file_path} was not created successfully at {storage_mount_path}"
37
-
38
- return test_file_name, test_file_content
39
-
40
-
41
- def test_file_access_from_onsite_storage_using_gfal(test_file: tuple[Path, str]):
24
+ def test_file_access_from_onsite_storage_using_gfal(
25
+ storage_mount_path: Path, onsite_test_file: tuple[Path, str]
26
+ ):
42
27
  """Verify that the file is accessible from the onsite storage pod using gfal-ls"""
43
- test_file_lfn, _ = test_file
44
- test_file_name = os.path.basename(test_file_lfn)
28
+ test_file_path, _ = onsite_test_file
29
+ test_file_lfn = f"/{test_file_path.relative_to(storage_mount_path)}"
30
+ test_file_name = test_file_path.name
45
31
 
46
- gfal_url = f"{STORAGE_PROTOCOL}://{STORAGE_HOSTNAME}/rucio{test_file_lfn}"
32
+ gfal_url = f"{STORAGE_PROTOCOL}://{STORAGE_HOSTNAME}/rucio/{test_file_lfn}"
47
33
  cmd = ["gfal-ls", gfal_url]
48
- try:
49
- output = sp.run(cmd, capture_output=True, text=True, check=True)
50
- debug = True # Adjust as needed
51
- if debug:
52
- print(f"GFAL Output: {output.stdout.strip()}")
53
- stdout = output.stdout.strip()
54
- except sp.CalledProcessError as e:
55
- pytest.fail(
56
- f"gfal-ls failed for {gfal_url}:\nSTDERR: {e.stderr.strip()}\nSTDOUT: {e.stdout.strip()}"
57
- )
58
-
59
- assert any(
60
- test_file_name in line for line in stdout.splitlines()
61
- ), f"File {test_file_name} not accessible; gfal-ls output: {stdout!r}"
34
+
35
+ ret = sp.run(cmd, capture_output=True, text=True)
36
+ stdout = ret.stdout.strip()
37
+ stderr = ret.stderr.strip()
38
+ msg = f"gfal-ls failed for {gfal_url}:\nSTDERR: {stderr}\nSTDOUT: {stderr}"
39
+ assert ret.returncode == 0, msg
40
+
41
+ msg = f"File {test_file_name} not accessible; gfal-ls output: {stdout!r}"
42
+ assert any(test_file_name in line for line in stdout.splitlines()), msg
62
43
 
63
44
 
64
45
  @pytest.mark.usefixtures("_auth_proxy")
bdms/tests/utils.py CHANGED
@@ -13,9 +13,27 @@ from rucio.common.exception import RucioException
13
13
  # Default timeout and polling interval (in seconds) for waiting for replication
14
14
  DEFAULT_TIMEOUT = 1000
15
15
  DEFAULT_POLL_INTERVAL = 30
16
+ XROOTD_UID = int(os.getenv("XROOTD_UID", 994))
17
+ XROOTD_GID = int(os.getenv("XROOTD_GID", 994))
16
18
  LOGGER = logging.getLogger(__name__)
17
19
 
18
20
 
21
+ def reset_xrootd_permissions(path):
22
+ recursive_chown(path, uid=XROOTD_UID, gid=XROOTD_GID)
23
+
24
+
25
+ def recursive_chown(path: Path, uid: int, gid: int):
26
+ """Equivalent of unix chmod -R <uid>:<gid> <path>."""
27
+ os.chown(path, uid, gid)
28
+
29
+ for root, dirs, files in os.walk(path):
30
+ root = Path(root)
31
+ for d in dirs:
32
+ os.chown(root / d, uid, gid)
33
+ for f in files:
34
+ os.chown(root / f, uid, gid)
35
+
36
+
19
37
  def wait_for_replication_status(
20
38
  rule_client: RuleClient,
21
39
  rule_id: str,
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ctao-bdms-clients
3
- Version: 0.2.0rc1
3
+ Version: 0.2.1
4
4
  Summary: Client module for the CTAO DPPS Bulk Data Management System
5
5
  Author-email: Georgios Zacharis <georgios.zacharis@inaf.it>, Stefano Gallozzi <Stefano.gallozzi@inaf.it>, Michele Mastropietro <michele.mastropietro@inaf.it>, Syed Anwar Ul Hasan <syedanwarul.hasan@cta-consortium.org>, Maximilian Linhoff <maximilian.linhoff@cta-observatory.org>, Volodymyr Savchenko <Volodymyr.Savchenko@epfl.ch>
6
- License: BSD-3-Clause
6
+ License-Expression: BSD-3-Clause
7
7
  Project-URL: repository, https://gitlab.cta-observatory.org/cta-computing/dpps/bdms/bdms
8
8
  Project-URL: documentation, http://cta-computing.gitlab-pages.cta-observatory.org/dpps/bdms/bdms
9
9
  Requires-Python: >=3.9
@@ -12,12 +12,14 @@ License-File: LICENSE
12
12
  Requires-Dist: astropy<8.0.0a0,>=6.0.1
13
13
  Requires-Dist: ctao-bdms-rucio-policy~=0.1.0
14
14
  Requires-Dist: rucio-clients~=35.7.0
15
+ Requires-Dist: protozfits>=2.7.2
15
16
  Provides-Extra: test
16
17
  Requires-Dist: pytest; extra == "test"
17
18
  Requires-Dist: pytest-cov; extra == "test"
18
19
  Requires-Dist: pytest-requirements; extra == "test"
19
20
  Requires-Dist: python-dotenv; extra == "test"
20
21
  Requires-Dist: minio; extra == "test"
22
+ Requires-Dist: pytest-xdist; extra == "test"
21
23
  Provides-Extra: doc
22
24
  Requires-Dist: sphinx; extra == "doc"
23
25
  Requires-Dist: numpydoc; extra == "doc"
@@ -0,0 +1,20 @@
1
+ bdms/__init__.py,sha256=7btE6tNhFqXSv2eUhZ-0m1J3nTTs4Xo6HWcQI4eh5Do,142
2
+ bdms/_version.py,sha256=UoNvMtd4wCG76RwoSpNCUtaFyTwakGcZolfjXzNVSMY,511
3
+ bdms/acada_ingestion.py,sha256=L-LBdfd7dbSbW0poseXsZ8CbgWch8j57yaQncIemnOs,17671
4
+ bdms/extract_fits_metadata.py,sha256=ZGJQCFJCXkWg8N3CAb17GB-wwPj-wTvNg0JOS-MemZ0,3431
5
+ bdms/version.py,sha256=mTfi1WzbIs991NyImM6mcMg1R39a6U1W2pKnk-Tt5Vw,765
6
+ bdms/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ bdms/tests/conftest.py,sha256=TqMBSExgO4omUHVqoXcI1AOhS3F8de-03275IlYEW6k,3896
8
+ bdms/tests/test_acada_ingestion.py,sha256=u27Srhm8kXKtFoPr5gQdZeGDjrlECMr9ysWwGfs2w_Q,18277
9
+ bdms/tests/test_basic_rucio_functionality.py,sha256=9GIX8IO6wBJm40LKFEH2StS-fMKvC07sxFHPVR7dftU,3583
10
+ bdms/tests/test_dpps_rel_0_0.py,sha256=MnbuBoS_kUUiMcHE3-jqOzekQNUa-wcsjCJqJQ2J9S4,2957
11
+ bdms/tests/test_extract_fits_metadata.py,sha256=A935WD2TF3lBcaeDmzGSlH2IXUF1v8qslrsW30lnEAA,3490
12
+ bdms/tests/test_file_replicas.py,sha256=NqutrSJa5ME50JpmyATNPSLqq1AOq1ruv84XSY3PKLI,2635
13
+ bdms/tests/test_metadata.py,sha256=f0tSqNGlYe-ydoSDJw0k1De2kHoPl6g-GYBj_jP6kCY,3728
14
+ bdms/tests/test_onsite_storage.py,sha256=waK7t9kBquzJbuLLYcpeNU9YuA70XTRS88RMxBWxawI,3765
15
+ bdms/tests/utils.py,sha256=4g7__ms-xnTyyBKMlmV4hpC505V6uVaXJDi9XQ8UC_4,3717
16
+ ctao_bdms_clients-0.2.1.dist-info/licenses/LICENSE,sha256=Py9riZY_f0CmXbrZ5JreE3WgglyWkRnwUfqydvX6jxE,1556
17
+ ctao_bdms_clients-0.2.1.dist-info/METADATA,sha256=1LZfXzTMTtQUqn1Npr11vfQnZ2m_GUbkdnGXWRF-e3Y,2383
18
+ ctao_bdms_clients-0.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
19
+ ctao_bdms_clients-0.2.1.dist-info/top_level.txt,sha256=ao0U8aA33KRHpcqmr7yrK8y2AQ6ahSu514tfaN4hDV8,5
20
+ ctao_bdms_clients-0.2.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.3.1)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,18 +0,0 @@
1
- bdms/__init__.py,sha256=7btE6tNhFqXSv2eUhZ-0m1J3nTTs4Xo6HWcQI4eh5Do,142
2
- bdms/_version.py,sha256=6j6NVXRMR-dX2osPsF0-SkvP1-ofWxEz6ew_4VL2kCY,521
3
- bdms/acada_ingestion.py,sha256=bKnXbAYvtYHYQk6ir5Sw1YIjCXGZTyk3IpZz-XGkkPo,16248
4
- bdms/version.py,sha256=mTfi1WzbIs991NyImM6mcMg1R39a6U1W2pKnk-Tt5Vw,765
5
- bdms/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- bdms/tests/conftest.py,sha256=lArkd8Kn7Ef19_BhqXq77taei9LKggWUu3FDUhrt9M4,3009
7
- bdms/tests/test_acada_ingestion.py,sha256=A0G9-ssHN3dx0Jz_eIN72dQp21gfZqdQnyAgLY3BDF4,17738
8
- bdms/tests/test_basic_rucio_functionality.py,sha256=GFUCq2QlM0M_5k5Qz9iPXPftE6nGuGYbW_IVS76T978,3604
9
- bdms/tests/test_dpps_rel_0_0.py,sha256=MnbuBoS_kUUiMcHE3-jqOzekQNUa-wcsjCJqJQ2J9S4,2957
10
- bdms/tests/test_file_replicas.py,sha256=NqutrSJa5ME50JpmyATNPSLqq1AOq1ruv84XSY3PKLI,2635
11
- bdms/tests/test_metadata.py,sha256=f0tSqNGlYe-ydoSDJw0k1De2kHoPl6g-GYBj_jP6kCY,3728
12
- bdms/tests/test_onsite_storage.py,sha256=xBwVbr2q0KHnesIrF0I8ova_hfDXDs3CBya2Sxi6VWM,4633
13
- bdms/tests/utils.py,sha256=fh23X6iN2-lsoRBU3WSeWkweiHZlOtIUK5xzHbWyP6c,3185
14
- ctao_bdms_clients-0.2.0rc1.dist-info/licenses/LICENSE,sha256=Py9riZY_f0CmXbrZ5JreE3WgglyWkRnwUfqydvX6jxE,1556
15
- ctao_bdms_clients-0.2.0rc1.dist-info/METADATA,sha256=88TkbmaMgsbU1dwCRzPHKWK-yYb323BT1HqFgsQboEg,2297
16
- ctao_bdms_clients-0.2.0rc1.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
17
- ctao_bdms_clients-0.2.0rc1.dist-info/top_level.txt,sha256=ao0U8aA33KRHpcqmr7yrK8y2AQ6ahSu514tfaN4hDV8,5
18
- ctao_bdms_clients-0.2.0rc1.dist-info/RECORD,,