ctao-bdms-clients 0.3.0rc1__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bdms/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.3.0rc1'
21
- __version_tuple__ = version_tuple = (0, 3, 0, 'rc1')
20
+ __version__ = version = '0.3.1'
21
+ __version_tuple__ = version_tuple = (0, 3, 1)
bdms/acada_ingest_cli.py CHANGED
@@ -256,6 +256,7 @@ def setup_logging(log_level, log_file=None):
256
256
  logging.getLogger("urllib3").setLevel(logging.WARNING)
257
257
  logging.getLogger("requests").setLevel(logging.WARNING)
258
258
  logging.getLogger("watchdog").setLevel(logging.WARNING)
259
+ logging.getLogger("charset_normalizer").setLevel(logging.WARNING)
259
260
 
260
261
 
261
262
  def create_ingestion_client(args) -> IngestionClient:
bdms/acada_ingestion.py CHANGED
@@ -13,13 +13,12 @@ import threading
13
13
  import time
14
14
  from concurrent.futures import Future, ProcessPoolExecutor
15
15
  from contextlib import ExitStack
16
- from enum import Enum
17
16
  from functools import partial
18
17
  from multiprocessing import cpu_count
19
18
  from pathlib import Path
20
19
  from queue import Empty, Queue
21
20
  from traceback import format_exception
22
- from typing import Optional, Union
21
+ from typing import NamedTuple, Optional, Union
23
22
 
24
23
  from astropy.io import fits
25
24
  from filelock import FileLock, Timeout
@@ -42,11 +41,10 @@ from bdms.extract_fits_metadata import (
42
41
 
43
42
  LOGGER = logging.getLogger(__name__)
44
43
 
45
- __all__ = ["IngestionClient", "FITSVerificationError", "Ingest"]
44
+ __all__ = ["IngestionClient", "FITSVerificationError", "Ingest", "IngestResult"]
46
45
 
47
46
  INGEST_RUNNING_MESSAGE = "Another ingestion process is already running"
48
47
  DETECTED_NEW_TRIGGER_FILE = "Detected new trigger file"
49
- INGEST_SUCCESS_MESSAGE = "Successfully ingested"
50
48
  TRIGGER_SUFFIX = ".trigger"
51
49
 
52
50
  # Prometheus Metrics for monitoring
@@ -58,8 +56,15 @@ N_TASKS_PROCESSED = Counter(
58
56
  "n_tasks_processed", "Total number of tasks processed by the Ingest daemon"
59
57
  )
60
58
  TASKS_IN_QUEUE = Gauge("n_tasks_queued", "Current number of queued tasks")
59
+ BYTES_INGESTED = Counter("bytes_ingested", "Total ingested file size")
61
60
 
62
- IngestStatus = Enum("IngestStatus", [("SUCCESS", 0), ("FAILURE", 1), ("SKIPPED", 2)])
61
+
62
+ class IngestResult(NamedTuple):
63
+ """Result of the ingestion of a single file."""
64
+
65
+ lfn: str
66
+ skipped: bool
67
+ file_size: int
63
68
 
64
69
 
65
70
  class IngestionClient:
@@ -257,7 +262,7 @@ class IngestionClient:
257
262
  return True
258
263
  return False
259
264
 
260
- def add_onsite_replica(self, acada_path: Union[str, Path]) -> tuple[str, bool]:
265
+ def add_onsite_replica(self, acada_path: Union[str, Path]) -> IngestResult:
261
266
  """Register a file as a replica in Rucio on the specified RSE and retrieve its LFN.
262
267
 
263
268
  Parameters
@@ -295,7 +300,7 @@ class IngestionClient:
295
300
  # Check if the replica already exists
296
301
  if self.check_replica_exists(lfn):
297
302
  self.logger.info("Replica already exists for lfn '%s', skipping", lfn)
298
- return lfn, True # Indicate the file was skipped
303
+ return IngestResult(lfn=lfn, skipped=True, file_size=0)
299
304
 
300
305
  # Proceed with registering the replica if check_replica_exists returns False
301
306
  valid, metadata = verify_and_extract_metadata(acada_path)
@@ -328,7 +333,7 @@ class IngestionClient:
328
333
  self.did_client.set_metadata_bulk(scope=self.scope, name=lfn, meta=metadata)
329
334
  self.logger.info("Set metadata of %r to %r", lfn, metadata)
330
335
 
331
- return lfn, False # Indicate the file was newly ingested
336
+ return IngestResult(lfn=lfn, skipped=False, file_size=file_size)
332
337
 
333
338
  def add_offsite_replication_rules(
334
339
  self,
@@ -512,7 +517,7 @@ def verify_and_extract_metadata(fits_path):
512
517
 
513
518
  def process_file(
514
519
  client: IngestionClient, file_path: str, logger=None, copies: int = 2
515
- ) -> IngestStatus:
520
+ ) -> IngestResult:
516
521
  """Process a single file with IngestionClient, clean up the trigger file, and return the ingestion status.
517
522
 
518
523
  Parameters
@@ -533,24 +538,17 @@ def process_file(
533
538
  - SKIPPED if the file was already ingested.
534
539
  """
535
540
  logger = logger or LOGGER.getChild("Ingest")
541
+ result = client.add_onsite_replica(file_path)
542
+
543
+ if not result.skipped:
544
+ client.add_offsite_replication_rules(result.lfn, copies=copies)
545
+
536
546
  trigger_file = Path(file_path + TRIGGER_SUFFIX)
537
- try:
538
- lfn, was_skipped = client.add_onsite_replica(file_path)
539
- if was_skipped:
540
- logger.info("Replica already exists for %s, skipping", file_path)
541
- if trigger_file.exists():
542
- trigger_file.unlink()
543
- logger.debug("Removed trigger file %s", trigger_file)
544
- return IngestStatus.SKIPPED
545
- client.add_offsite_replication_rules(lfn, copies=copies)
546
- logger.info("%s %s, LFN: %s", INGEST_SUCCESS_MESSAGE, file_path, lfn)
547
- if trigger_file.exists():
548
- trigger_file.unlink()
549
- logger.debug("Removed trigger file %s", trigger_file)
550
- return IngestStatus.SUCCESS
551
- except Exception as e:
552
- logger.exception("Exception in process_file for %s: %s", file_path, str(e))
553
- return IngestStatus.FAILURE
547
+ if trigger_file.exists():
548
+ trigger_file.unlink()
549
+ logger.debug("Removed trigger file %s", trigger_file)
550
+
551
+ return result
554
552
 
555
553
 
556
554
  class TriggerFileHandler(FileSystemEventHandler):
@@ -756,7 +754,7 @@ class Ingest:
756
754
  rate = processed_count / elapsed_time if elapsed_time > 0 else 0
757
755
 
758
756
  # Clean up task tracking
759
- self.submitted_tasks.pop(task_id, None)
757
+ path = self.submitted_tasks.pop(task_id, None)
760
758
  current_concurrent = len(self.submitted_tasks)
761
759
  TASKS_IN_QUEUE.dec() # Always decrement queue counter
762
760
 
@@ -767,30 +765,33 @@ class Ingest:
767
765
  )
768
766
 
769
767
  # Process the result
768
+ # the order here is important, as the methods on the future object
769
+ # raise if the future is in the wrong state
770
770
  if future.cancelled():
771
771
  status = "cancelled"
772
- N_TASKS_CANCELLED.inc() # Increment cancellation counter
772
+ N_TASKS_CANCELLED.inc()
773
+
773
774
  elif (e := future.exception()) is not None:
774
775
  self.logger.error(
775
- "Task %d failed: %s",
776
+ "Task %d for path %s failed: %s",
776
777
  task_id,
778
+ path,
777
779
  "".join(format_exception(type(e), e, e.__traceback__)),
778
780
  )
779
781
  status = "failed"
780
- N_TASKS_FAILED.inc() # Increment failure counter
782
+ N_TASKS_FAILED.inc()
783
+
781
784
  else:
782
785
  result = future.result()
783
- if result == IngestStatus.SUCCESS:
786
+ if not result.skipped:
784
787
  status = "success"
785
- N_TASKS_SUCCESS.inc() # Increment success counter
786
- elif result == IngestStatus.SKIPPED:
787
- status = "skipped"
788
- N_TASKS_SKIPPED.inc() # Increment skipped counter
788
+ N_TASKS_SUCCESS.inc()
789
+ BYTES_INGESTED.inc(result.file_size)
789
790
  else:
790
- status = "failed"
791
- N_TASKS_FAILED.inc() # Increment failure counter
791
+ status = "skipped"
792
+ N_TASKS_SKIPPED.inc()
792
793
 
793
- N_TASKS_PROCESSED.inc() # Increment total processed counter
794
+ N_TASKS_PROCESSED.inc()
794
795
 
795
796
  # Summary log for all cases
796
797
  self.logger.info(
@@ -896,8 +897,8 @@ class Ingest:
896
897
  result_thread.start()
897
898
  self.logger.info("Started result processing thread")
898
899
 
899
- with ProcessPoolExecutor(max_workers=self.num_workers) as executor:
900
- self.executor = executor # Store reference for submit_file method
900
+ self.executor = ProcessPoolExecutor(max_workers=self.num_workers)
901
+ with self.executor:
901
902
  self.logger.info(
902
903
  "Started process pool with %d workers", self.num_workers
903
904
  )
@@ -29,11 +29,9 @@ from watchdog.events import FileMovedEvent
29
29
 
30
30
  from bdms.acada_ingestion import (
31
31
  DETECTED_NEW_TRIGGER_FILE,
32
- INGEST_SUCCESS_MESSAGE,
33
32
  TRIGGER_SUFFIX,
34
33
  Ingest,
35
34
  IngestionClient,
36
- IngestStatus,
37
35
  TriggerFileHandler,
38
36
  process_file,
39
37
  )
@@ -238,7 +236,8 @@ def test_add_onsite_replica_with_minio_fits_file(
238
236
  reset_xrootd_permissions(storage_mount_path)
239
237
 
240
238
  # Use add_onsite_replica to register the replica
241
- lfn, skipped = ingestion_client.add_onsite_replica(acada_path=acada_path)
239
+ lfn, skipped, size = ingestion_client.add_onsite_replica(acada_path=acada_path)
240
+ assert size == os.stat(acada_path).st_size
242
241
 
243
242
  # Verify the LFN matches the expected LFN
244
243
  expected_lfn = ingestion_client.acada_to_lfn(acada_path)
@@ -267,11 +266,12 @@ def test_add_onsite_replica_with_minio_fits_file(
267
266
 
268
267
  # Check for don't ingest again if its already registered
269
268
  caplog.clear()
270
- lfn_check, skipped_check = ingestion_client.add_onsite_replica(
269
+ lfn_check, skipped_check, size = ingestion_client.add_onsite_replica(
271
270
  acada_path=acada_path
272
271
  )
273
272
  msg = f"LFN mismatch on second ingestion attempt: expected {lfn}, got {lfn_check}"
274
273
  assert lfn_check == lfn, msg
274
+ assert size == 0, "Expected size 0 for skipped file"
275
275
 
276
276
  msg = (
277
277
  "Expected the file to be skipped on second ingestion, but it was ingested again"
@@ -672,14 +672,17 @@ def test_process_file_success(
672
672
  scope=test_scope,
673
673
  )
674
674
 
675
- acada_path, _ = onsite_test_file
675
+ acada_path, test_file_content = onsite_test_file
676
676
  test_file = acada_path
677
677
  trigger_file = Path(str(test_file) + TRIGGER_SUFFIX)
678
678
  trigger_file.symlink_to(test_file)
679
679
  result = process_file(ingestion_client, str(test_file))
680
- assert result == IngestStatus.SUCCESS
680
+
681
+ assert result.file_size == len(test_file_content)
682
+ assert not result.skipped
681
683
  assert not trigger_file.exists()
682
- assert INGEST_SUCCESS_MESSAGE in caplog.text
684
+ assert "Successfully registered the replica for lfn" in caplog.text
685
+ assert "Created 2 offsite replication rule(s) for LFN" in caplog.text
683
686
 
684
687
 
685
688
  @pytest.mark.usefixtures("_auth_proxy")
@@ -694,19 +697,26 @@ def test_process_file_skipped(
694
697
  scope=test_scope,
695
698
  )
696
699
 
697
- acada_path, _ = onsite_test_file
700
+ acada_path, test_file_content = onsite_test_file
698
701
  test_file = acada_path
699
702
  trigger_file = Path(str(test_file) + TRIGGER_SUFFIX)
700
703
  trigger_file.symlink_to(test_file)
701
- process_file(ingestion_client, str(test_file))
704
+
705
+ # process file for the first time
706
+ result = process_file(ingestion_client, str(test_file))
707
+ assert not result.skipped
708
+ assert result.file_size == len(test_file_content)
709
+
702
710
  caplog.clear()
711
+ # process file second time to verify it is skipped
703
712
  result = process_file(ingestion_client, str(test_file))
704
- assert result == IngestStatus.SKIPPED
713
+ assert result.skipped
714
+ assert result.file_size == 0
705
715
  assert "Replica already exists" in caplog.text
706
716
 
707
717
 
708
718
  @pytest.mark.usefixtures("_auth_proxy")
709
- def test_process_file_failure(storage_mount_path, caplog, tmp_path):
719
+ def test_process_file_failure(storage_mount_path, tmp_path):
710
720
  """Test for checking failure for invalid file paths"""
711
721
  ingestion_client = IngestionClient(
712
722
  data_path=storage_mount_path,
@@ -721,18 +731,8 @@ def test_process_file_failure(storage_mount_path, caplog, tmp_path):
721
731
  trigger_file.symlink_to(invalid_file)
722
732
 
723
733
  # The file path is outside the data_path causing a ValueError in acada_to_lfn
724
- result = process_file(ingestion_client, str(invalid_file))
725
-
726
- # Verify the function returns FAILURE status instead of raising an exception
727
- assert result == IngestStatus.FAILURE
728
-
729
- # Check for the actual error message that gets logged
730
- assert "Exception in process_file" in caplog.text
731
- # Verify the file path is in the error message
732
- assert str(invalid_file) in caplog.text
733
-
734
- # Verify that no success message was logged
735
- assert INGEST_SUCCESS_MESSAGE not in caplog.text
734
+ with pytest.raises(ValueError, match="is not within data_path"):
735
+ process_file(ingestion_client, str(invalid_file))
736
736
 
737
737
  # Trigger file should still exist since ingestion failed
738
738
  msg = "Trigger file should not be removed when ingestion fails"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ctao-bdms-clients
3
- Version: 0.3.0rc1
3
+ Version: 0.3.1
4
4
  Summary: Client module for the CTAO DPPS Bulk Data Management System
5
5
  Author-email: Georgios Zacharis <georgios.zacharis@inaf.it>, Stefano Gallozzi <Stefano.gallozzi@inaf.it>, Michele Mastropietro <michele.mastropietro@inaf.it>, Syed Anwar Ul Hasan <syedanwarul.hasan@cta-consortium.org>, Maximilian Linhoff <maximilian.linhoff@cta-observatory.org>, Volodymyr Savchenko <Volodymyr.Savchenko@epfl.ch>
6
6
  License-Expression: BSD-3-Clause
@@ -1,13 +1,13 @@
1
1
  bdms/__init__.py,sha256=7btE6tNhFqXSv2eUhZ-0m1J3nTTs4Xo6HWcQI4eh5Do,142
2
- bdms/_version.py,sha256=ymwdyKB404aMzKXrx7y01ltePvHS_nIJjfh_zAIIN44,521
3
- bdms/acada_ingest_cli.py,sha256=xkf9nT5Lk7SjcbxVeBpKJWuJ-8Luze5-MSq4yki-7_k,12866
4
- bdms/acada_ingestion.py,sha256=mB5ilvzJbPblFp94Jcca-IzYvrMuQlroDZxuujpFB_I,36373
2
+ bdms/_version.py,sha256=lOWWIGJeBi0KkFopWU_n3GH71C1PsaZ-ZYDfxFkne6c,511
3
+ bdms/acada_ingest_cli.py,sha256=_AksS4NFhG6SsHbhjFnO-pgEKsIuTDFX7m_iraK-C7A,12936
4
+ bdms/acada_ingestion.py,sha256=Z44s9meJC2OSJ-pL0cxHiDHmwF33xYNaTbwcqKq00VE,35782
5
5
  bdms/extract_fits_metadata.py,sha256=ZGJQCFJCXkWg8N3CAb17GB-wwPj-wTvNg0JOS-MemZ0,3431
6
6
  bdms/version.py,sha256=mTfi1WzbIs991NyImM6mcMg1R39a6U1W2pKnk-Tt5Vw,765
7
7
  bdms/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  bdms/tests/conftest.py,sha256=n7KN9foojCXDxFuZinI0MvhnSvLk5Mn7aFmjQKmO8eI,7364
9
9
  bdms/tests/test_acada_ingest_cli.py,sha256=SYVt1xlEDsrbPX0C5Isf0thjUcaxr7cjflyZSwpPBaw,8314
10
- bdms/tests/test_acada_ingestion.py,sha256=xQN07Qbx00IW_w0vCcR5r5H3qvvl_JNYmCUuWJX9xrc,59485
10
+ bdms/tests/test_acada_ingestion.py,sha256=sJV7m_rzNTx2J7Zz5twArj7ME6Os4KiGMK7OI5Fm5Ko,59554
11
11
  bdms/tests/test_basic_rucio_functionality.py,sha256=9GIX8IO6wBJm40LKFEH2StS-fMKvC07sxFHPVR7dftU,3583
12
12
  bdms/tests/test_dpps_rel_0_0.py,sha256=2NhxpdhXQg_8lmK-tRrPQ_FcijsIEfv07x-kVlT8Zik,3138
13
13
  bdms/tests/test_extract_fits_metadata.py,sha256=A935WD2TF3lBcaeDmzGSlH2IXUF1v8qslrsW30lnEAA,3490
@@ -15,9 +15,9 @@ bdms/tests/test_file_replicas.py,sha256=NqutrSJa5ME50JpmyATNPSLqq1AOq1ruv84XSY3P
15
15
  bdms/tests/test_metadata.py,sha256=f0tSqNGlYe-ydoSDJw0k1De2kHoPl6g-GYBj_jP6kCY,3728
16
16
  bdms/tests/test_onsite_storage.py,sha256=waK7t9kBquzJbuLLYcpeNU9YuA70XTRS88RMxBWxawI,3765
17
17
  bdms/tests/utils.py,sha256=PUayWe60JGVDs5mkWmHVjFV_yqg5XUQlxoAvhT1P0OM,4101
18
- ctao_bdms_clients-0.3.0rc1.dist-info/licenses/LICENSE,sha256=Py9riZY_f0CmXbrZ5JreE3WgglyWkRnwUfqydvX6jxE,1556
19
- ctao_bdms_clients-0.3.0rc1.dist-info/METADATA,sha256=NRkliF-xYd9V8Vfkdgj2MEFQI9ee67wmUVKxhFD9tMo,2517
20
- ctao_bdms_clients-0.3.0rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
- ctao_bdms_clients-0.3.0rc1.dist-info/entry_points.txt,sha256=YZCIOePi_xXaJunA6lAQxAKh1tn3wOd4pmqymFRvah4,60
22
- ctao_bdms_clients-0.3.0rc1.dist-info/top_level.txt,sha256=ao0U8aA33KRHpcqmr7yrK8y2AQ6ahSu514tfaN4hDV8,5
23
- ctao_bdms_clients-0.3.0rc1.dist-info/RECORD,,
18
+ ctao_bdms_clients-0.3.1.dist-info/licenses/LICENSE,sha256=Py9riZY_f0CmXbrZ5JreE3WgglyWkRnwUfqydvX6jxE,1556
19
+ ctao_bdms_clients-0.3.1.dist-info/METADATA,sha256=U8xOFh-YkxRI9hqHftAvdDji4UbCPCPvsFwVLEUfYGo,2514
20
+ ctao_bdms_clients-0.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
+ ctao_bdms_clients-0.3.1.dist-info/entry_points.txt,sha256=YZCIOePi_xXaJunA6lAQxAKh1tn3wOd4pmqymFRvah4,60
22
+ ctao_bdms_clients-0.3.1.dist-info/top_level.txt,sha256=ao0U8aA33KRHpcqmr7yrK8y2AQ6ahSu514tfaN4hDV8,5
23
+ ctao_bdms_clients-0.3.1.dist-info/RECORD,,