ctao-bdms-clients 0.1.0__py3-none-any.whl → 0.2.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bdms/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.1.0'
21
- __version_tuple__ = version_tuple = (0, 1, 0)
20
+ __version__ = version = '0.2.0rc1'
21
+ __version_tuple__ = version_tuple = (0, 2, 0, 'rc1')
@@ -0,0 +1,435 @@
1
+ """Module for ACADA data ingestion (onsite) into the BDMS system using the IngestionClient.
2
+
3
+ This module provides the IngestionClient class to manage the ingestion of ACADA data into the BDMS system.
4
+ It includes functionality for constructing FITS file paths, converting ACADA paths to Logical File Names (LFNs),
5
+ and registering replicas in Rucio.
6
+ """
7
+ # this is the latest working file (as on 6:40 pm) with judge repairer repairing the STUCK rule.
8
+
9
+ import logging
10
+ import os
11
+ from pathlib import Path
12
+ from typing import Optional, Union
13
+
14
+ from astropy.io import fits
15
+ from rucio.client.accountclient import AccountClient
16
+ from rucio.client.client import Client
17
+ from rucio.client.replicaclient import ReplicaClient
18
+ from rucio.client.rseclient import RSEClient
19
+ from rucio.client.ruleclient import RuleClient
20
+ from rucio.client.scopeclient import ScopeClient
21
+ from rucio.common.exception import Duplicate, RucioException
22
+ from rucio.common.utils import adler32
23
+
24
+ LOGGER = logging.getLogger(__name__)
25
+
26
+
27
+ __all__ = [
28
+ "IngestionClient",
29
+ ]
30
+
31
+
32
+ class IngestionClient:
33
+ """A client for BDMS ingestion and replication.
34
+
35
+ This class provides methods to ingest ACADA data into the BDMS system, including converting ACADA paths to
36
+ Logical File Names (LFNs), registering replicas in Rucio, and replicating data to offsite RSEs.
37
+
38
+ Parameters
39
+ ----------
40
+ data_path : str
41
+ Path to data directory. This is a required argument.
42
+ rse : str
43
+ Rucio Storage Element (RSE) name. This is a required argument.
44
+ vo : str, optional
45
+ Virtual organization name prefix. Defaults to "ctao".
46
+ logger : logging.Logger, optional
47
+ Logger instance. If None, a new logger is created.
48
+ scope : str, optional
49
+ Rucio scope to use for replica registration. Defaults to 'acada'.
50
+
51
+ Raises
52
+ ------
53
+ FileNotFoundError
54
+ If the specified data directory does not exist.
55
+ ValueError
56
+ If the specified RSE is not available in Rucio.
57
+ RuntimeError
58
+ If there is an error communicating with Rucio while:
59
+
60
+ - Checking RSE availability.
61
+ - Initializing Rucio clients (related to configuration and authentication issues).
62
+ - Managing the Rucio scope.
63
+ """
64
+
65
+ def __init__(
66
+ self,
67
+ data_path: Union[str, os.PathLike],
68
+ rse: str,
69
+ vo="ctao",
70
+ logger=None,
71
+ scope="acada",
72
+ ) -> None:
73
+ self.logger = logger or LOGGER.getChild(self.__class__.__name__)
74
+ self.vo = vo
75
+
76
+ # Set data path (Prefix)
77
+ self.data_path = Path(data_path)
78
+ if not self.data_path.is_dir():
79
+ raise FileNotFoundError(f"Data directory not found at {self.data_path}")
80
+
81
+ self.rse = rse
82
+
83
+ # Check RSE availability before proceeding to next steps
84
+ self._check_rse_availability()
85
+
86
+ # Initialize Rucio clients
87
+ try:
88
+ self.client = Client()
89
+ self.replica_client = ReplicaClient()
90
+ self.scope_client = ScopeClient()
91
+ self.account_client = AccountClient()
92
+ self.rse_client = RSEClient()
93
+ self.rule_client = RuleClient()
94
+ except RucioException as e:
95
+ self.logger.error("Failed to initialize Rucio clients: %s", str(e))
96
+ raise
97
+
98
+ # Set the scope and ensure it exists in Rucio
99
+ self.scope = scope
100
+ self.user = self.account_client.whoami()["account"]
101
+ self._add_acada_scope()
102
+
103
+ def _check_rse_availability(self) -> None:
104
+ """Check if the specified RSE is available in Rucio.
105
+
106
+ Raises
107
+ ------
108
+ ValueError
109
+ If the RSE is not found in Rucio.
110
+ rucio.common.exception.RucioException
111
+ If there is an error communicating with Rucio (e.g., network issues, authentication errors).
112
+ """
113
+ rse_client = RSEClient()
114
+ available_rses = [rse["rse"] for rse in rse_client.list_rses()]
115
+ if self.rse not in available_rses:
116
+ raise ValueError(
117
+ f"RSE '{self.rse}' is not available in Rucio. Available RSEs: {available_rses}"
118
+ )
119
+ self.logger.info("RSE '%s' is available in Rucio", self.rse)
120
+
121
+ def _add_acada_scope(self) -> None:
122
+ """Add the specified scope to Rucio if it doesn't already exist.
123
+
124
+ Raises
125
+ ------
126
+ RuntimeError
127
+ If the scope cannot be created or managed in Rucio.
128
+ """
129
+ try:
130
+ self.scope_client.add_scope(self.user, self.scope)
131
+ except Duplicate:
132
+ # Scope already exists
133
+ return
134
+ except RucioException as e:
135
+ self.logger.error(
136
+ "Failed to manage scope '%s' in Rucio: %s",
137
+ self.scope,
138
+ str(e),
139
+ )
140
+ raise
141
+
142
+ def acada_to_lfn(self, acada_path) -> str:
143
+ """Convert an ACADA path to a BDMS Logical File Name (LFN).
144
+
145
+ Parameters
146
+ ----------
147
+ acada_path : str or Path
148
+ The ACADA file path to convert.
149
+
150
+ Returns
151
+ -------
152
+ str
153
+ The generated BDMS LFN (e.g., '/ctao/acada/DL0/LSTN-01/events/YYYY/MM/DD/file.fits.fz').
154
+
155
+ Raises
156
+ ------
157
+ ValueError
158
+ If ``acada_path`` is not an absolute path or is not within the BDMS data path (prefix) or
159
+ does not start with the expected '<vo>/<scope>' prefix under the data path.
160
+ """
161
+ acada_path = Path(acada_path)
162
+
163
+ # Validate that the path is absolute
164
+ if not acada_path.is_absolute():
165
+ raise ValueError("acada_path must be absolute")
166
+
167
+ # Validate that acada_path is within data_path
168
+ try:
169
+ rel_path = acada_path.relative_to(self.data_path)
170
+ except ValueError:
171
+ raise ValueError(
172
+ f"acada_path {acada_path} is not within data_path {self.data_path}"
173
+ )
174
+
175
+ # Validate that acada_path starts with <vo>/<scope> under data_path
176
+ expected_prefix = self.data_path / self.vo / self.scope
177
+ if not acada_path.is_relative_to(expected_prefix):
178
+ raise ValueError(
179
+ f"acada_path {acada_path} must start with {expected_prefix} (vo: {self.vo}, scope: {self.scope})"
180
+ )
181
+
182
+ bdms_lfn = f"/{rel_path}"
183
+ return bdms_lfn
184
+
185
+ def check_replica_exists(self, lfn: str) -> bool:
186
+ """Check if a replica already exists for the given LFN on the specified RSE.
187
+
188
+ Parameters
189
+ ----------
190
+ lfn : str
191
+ The Logical File Name (LFN) to check.
192
+
193
+
194
+ Returns
195
+ -------
196
+ bool
197
+ True if the replica exists and has a valid PFN, False otherwise.
198
+
199
+ Raises
200
+ ------
201
+ RuntimeError
202
+ If a replica exists but has no PFN for the RSE, indicating an invalid replica state.
203
+ """
204
+ replicas = list(
205
+ self.replica_client.list_replicas(
206
+ dids=[{"scope": self.scope, "name": lfn}],
207
+ rse_expression=self.rse,
208
+ )
209
+ )
210
+
211
+ self.logger.debug("Existing Replicas for lfn '%r'", replicas)
212
+ if replicas:
213
+ replica = replicas[0]
214
+ pfns = replica["rses"].get(self.rse, [])
215
+ if not pfns:
216
+ raise RuntimeError(
217
+ f"No PFN found for existing replica with LFN {lfn} on {self.rse}"
218
+ )
219
+ return True
220
+ return False
221
+
222
+ def add_onsite_replica(self, acada_path) -> str:
223
+ """Register a file as a replica in Rucio on the specified RSE and retrieve its LFN.
224
+
225
+ Parameters
226
+ ----------
227
+ acada_path : str or Path
228
+ The ACADA path where the file is located.
229
+
230
+ rse : str, optional
231
+ The RSE to register the replica on. If None, uses the client's RSE (self.rse).
232
+
233
+ Returns
234
+ -------
235
+ str
236
+ The Logical File Name (LFN) of the registered replica.
237
+
238
+ Raises
239
+ ------
240
+ FileNotFoundError
241
+ If the file does not exist at ``acada_path``.
242
+ RuntimeError
243
+ In the following cases:
244
+ - If a replica already exists but has no PFN for the RSE (raised by `check_replica_exists`).
245
+ - If the ``IngestionClient.add_replica`` call fails during registration (e.g., due to a Rucio server issue).
246
+ """
247
+ acada_path = Path(acada_path)
248
+ self.logger.debug("Starting ingestion for path '%s'", acada_path)
249
+
250
+ # Validate file existence
251
+ if not acada_path.is_file():
252
+ raise FileNotFoundError(f"File does not exist at {acada_path}")
253
+
254
+ # Generate LFN
255
+ lfn = self.acada_to_lfn(acada_path=str(acada_path))
256
+ self.logger.info("Using LFN '%s' for path '%s'", lfn, acada_path)
257
+
258
+ # Check if the replica already exists
259
+ if self.check_replica_exists(lfn):
260
+ self.logger.info("Replica already exists for lfn '%s', skipping", lfn)
261
+ return lfn
262
+
263
+ # Proceed with registering the replica if check_replica_exists returns False
264
+
265
+ # Compute file metadata
266
+ # TODO: use functions to identify file type, extract metadata, validate integrity, when this functionality is ready https://gitlab.cta-observatory.org/cta-computing/dpps/bdms/bdms/-/work_items/46
267
+ file_size = acada_path.stat().st_size
268
+ checksum = adler32(acada_path)
269
+
270
+ # Register the replica in Rucio
271
+ try:
272
+ success = self.replica_client.add_replica(
273
+ rse=self.rse,
274
+ scope=self.scope,
275
+ name=lfn,
276
+ bytes_=file_size,
277
+ adler32=checksum,
278
+ )
279
+ if not success:
280
+ raise RuntimeError(
281
+ f"Failed to register replica for LFN {lfn} on {self.rse}"
282
+ )
283
+ except Exception as e:
284
+ raise RuntimeError(
285
+ f"Failed to register replica for LFN {lfn} on {self.rse}: {str(e)}"
286
+ )
287
+ self.logger.info("Successfully registered the replica for lfn '%s'", lfn)
288
+
289
+ return lfn
290
+
291
+ def add_offsite_replication_rules(
292
+ self,
293
+ lfn: str,
294
+ copies: int = 1,
295
+ lifetime: Optional[int] = None,
296
+ offsite_rse_expression: str = "OFFSITE",
297
+ ) -> list[str]:
298
+ """Replicate an already-ingested ACADA data product to offsite RSEs.
299
+
300
+ This method assumes the data product has already been ingested into the onsite RSE and is identified by the given LFN.
301
+ It creates one or two replication rules to offsite RSEs, depending on the number of copies requested:
302
+ - First rule: Always creates exactly 1 replica to prevent parallel transfers from the onsite RSE.
303
+ - Second rule (if copies > 1): Creates additional replicas (equal to the requested copies), sourcing data from offsite RSEs to avoid further transfers from the onsite RSE.
304
+
305
+ Parameters
306
+ ----------
307
+ lfn : str
308
+ The Logical File Name (LFN) of the already-ingested ACADA data product.
309
+ copies : int, optional
310
+ The total number of offsite replicas to create. Defaults to 1.
311
+ - If copies == 1, only one rule is created with 1 replica.
312
+ - If copies > 1, a second rule is created with the requested number of copies, sourcing from offsite RSEs.
313
+ lifetime : int, optional
314
+ The lifetime of the replication rules in seconds. If None, the rules are permanent.
315
+ offsite_rse_expression : str, optional
316
+ The RSE expression identifying offsite Rucio Storage Elements (RSEs). Defaults to "OFFSITE".
317
+
318
+ Returns
319
+ -------
320
+ List[str]
321
+ The list of replication rule IDs created (1 or 2 rules, depending on the copies parameter).
322
+
323
+ Raises
324
+ ------
325
+ RuntimeError
326
+ If there is an error interacting with Rucio, including:
327
+ - Failure to create a new replication rule (e.g., DuplicateRule).
328
+ """
329
+ # Create the DID for replication
330
+ did = {"scope": self.scope, "name": lfn}
331
+ dids = [did]
332
+
333
+ # Initialize the list of rule IDs
334
+ rule_ids = []
335
+
336
+ # First rule: Always create exactly 1 replica to prevent parallel transfers from onsite RSE
337
+ try:
338
+ rule_id_offsite_1 = self.rule_client.add_replication_rule(
339
+ dids=dids,
340
+ rse_expression=offsite_rse_expression,
341
+ copies=1,
342
+ lifetime=lifetime,
343
+ source_replica_expression=None, # Let Rucio choose the source (onsite RSE)
344
+ )[0]
345
+ self.logger.debug(
346
+ "Created first replication rule %s for DID %s to RSE expression '%s' with 1 copy, lifetime %s",
347
+ rule_id_offsite_1,
348
+ did,
349
+ offsite_rse_expression,
350
+ lifetime if lifetime is not None else "permanent",
351
+ )
352
+ rule_ids.append(rule_id_offsite_1)
353
+ except RucioException as e:
354
+ self.logger.error(
355
+ "Failed to create first offsite replication rule for DID %s to RSE expression '%s': %s",
356
+ did,
357
+ offsite_rse_expression,
358
+ str(e),
359
+ )
360
+ raise
361
+
362
+ # Second rule: If more than one copy is requested, create a second rule sourcing from offsite RSEs
363
+ if copies > 1:
364
+ # Exclude the onsite RSE to ensure the data is sourced from an offsite RSE
365
+ # source_replica_expression = f"*\\{onsite_rse}" (we could also consider this expression)
366
+ source_replica_expression = offsite_rse_expression
367
+ self.logger.debug(
368
+ "Creating second offsite replication rule to RSE expression '%s' with %d copies, sourcing from offsite RSEs",
369
+ offsite_rse_expression,
370
+ copies,
371
+ )
372
+ try:
373
+ rule_id_offsite_2 = self.rule_client.add_replication_rule(
374
+ dids=dids,
375
+ rse_expression=offsite_rse_expression,
376
+ copies=copies, # Use requested number of copies
377
+ lifetime=lifetime,
378
+ source_replica_expression=source_replica_expression,
379
+ )[0]
380
+ self.logger.debug(
381
+ "Created second replication rule %s for DID %s to RSE expression '%s' with %d copies, source_replica_expression '%s', lifetime %s",
382
+ rule_id_offsite_2,
383
+ did,
384
+ offsite_rse_expression,
385
+ copies,
386
+ source_replica_expression,
387
+ lifetime if lifetime is not None else "permanent",
388
+ )
389
+ rule_ids.append(rule_id_offsite_2)
390
+ except RucioException as e:
391
+ self.logger.error(
392
+ "Failed to create second offsite replication rule for DID %s to RSE expression '%s': %s",
393
+ did,
394
+ offsite_rse_expression,
395
+ str(e),
396
+ )
397
+ raise
398
+
399
+ self.logger.info(
400
+ "Created %d offsite replication rule(s) for LFN '%s' to RSE expression '%s': %s",
401
+ len(rule_ids),
402
+ lfn,
403
+ offsite_rse_expression,
404
+ rule_ids,
405
+ )
406
+ return rule_ids
407
+
408
+
409
+ class FITSVerificationError(Exception):
410
+ """Raised when a FITS file does not pass verification."""
411
+
412
+
413
+ def verify_fits_checksum(hdul: fits.HDUList):
414
+ """
415
+ Verify all present checksums in the given HDUList.
416
+
417
+ Goes through all HDUs and verifies DATASUM and CHECKSUM if
418
+ present in the given HDU.
419
+
420
+ Verifies DATASUM before CHECKSUM to distinguish failure
421
+ in data section vs. failure in header section.
422
+
423
+ Raises
424
+ ------
425
+ FITSVerificationError: in case any of the checks are not passing
426
+ """
427
+ for pos, hdu in enumerate(hdul):
428
+ name = hdu.name or ""
429
+
430
+ checksum_result = hdu.verify_checksum()
431
+ if checksum_result == 0:
432
+ msg = f"CHECKSUM verification failed for HDU {pos} with name {name!r}"
433
+ raise FITSVerificationError(msg)
434
+ elif checksum_result == 2 and pos != 0: # ignore primary for warning
435
+ LOGGER.warning("No CHECKSUM in HDU %d with name %r", pos, name)
bdms/tests/conftest.py CHANGED
@@ -1,14 +1,34 @@
1
+ import logging
1
2
  import os
2
3
  import subprocess as sp
3
4
  from datetime import datetime
5
+ from pathlib import Path
4
6
  from secrets import token_hex
5
7
 
6
8
  import pytest
7
9
  from rucio.client.scopeclient import ScopeClient
8
10
 
11
+ from bdms.tests.utils import download_test_file
12
+
9
13
  USER_CERT = os.getenv("RUCIO_CFG_CLIENT_CERT", "/opt/rucio/etc/usercert.pem")
10
14
  USER_KEY = os.getenv("RUCIO_CFG_CLIENT_KEY", "/opt/rucio/etc/userkey.pem")
11
15
 
16
+ # Define on-site storage related variables
17
+ STORAGE_MOUNT_PATH = Path(os.getenv("STORAGE_MOUNT_PATH", "/storage-1"))
18
+ STORAGE_PROTOCOL = "root" # e.g., root, davs, gsiftp
19
+ STORAGE_HOSTNAME = "rucio-storage-1" # on-site storage container hostname
20
+
21
+
22
+ def pytest_configure():
23
+ # gfal is overly verbose on info (global default), reduce a bit
24
+ logging.getLogger("gfal2").setLevel(logging.WARNING)
25
+
26
+
27
+ @pytest.fixture(scope="session")
28
+ def storage_mount_path():
29
+ """Provide the STORAGE_MOUNT_PATH as a fixture"""
30
+ return STORAGE_MOUNT_PATH
31
+
12
32
 
13
33
  @pytest.fixture(scope="session")
14
34
  def test_user():
@@ -51,3 +71,24 @@ def test_scope(test_user):
51
71
  sc = ScopeClient()
52
72
  sc.add_scope(test_user, scope)
53
73
  return scope
74
+
75
+
76
+ @pytest.fixture(scope="session")
77
+ def subarray_test_file():
78
+ """Fixture to download a subarray test file"""
79
+ path = "acada-small/DL0/ARRAY/ctao-n-acada/acada-adh/triggers/2025/02/04/SUB000_SWAT000_20250204T213405_SBID0000000002000000066_OBSID0000000002000000200_SUBARRAY_CHUNK000.fits.fz"
80
+ return download_test_file(path)
81
+
82
+
83
+ @pytest.fixture(scope="session")
84
+ def tel_trigger_test_file():
85
+ """Fixture to download a telescope trigger test file"""
86
+ path = "acada-small/DL0/ARRAY/ctao-n-acada/acada-adh/triggers/2025/02/04/SUB000_SWAT000_20250204T213405_SBID0000000002000000066_OBSID0000000002000000200_TEL_CHUNK000.fits.fz"
87
+ return download_test_file(path)
88
+
89
+
90
+ @pytest.fixture(scope="session")
91
+ def tel_events_test_file():
92
+ """Fixture to download a telescope events test file"""
93
+ path = "acada-small/DL0/LSTN-01/ctao-n-acada/acada-adh/events/2025/02/04/TEL001_SDH0000_20250204T213354_SBID0000000002000000066_OBSID0000000002000000200_CHUNK000.fits.fz"
94
+ return download_test_file(path)