ctao-bdms-clients 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bdms/_version.py +2 -2
- bdms/acada_ingest_cli.py +1 -0
- bdms/acada_ingestion.py +41 -40
- bdms/tests/test_acada_ingestion.py +23 -23
- {ctao_bdms_clients-0.3.0.dist-info → ctao_bdms_clients-0.3.1.dist-info}/METADATA +1 -1
- {ctao_bdms_clients-0.3.0.dist-info → ctao_bdms_clients-0.3.1.dist-info}/RECORD +10 -10
- {ctao_bdms_clients-0.3.0.dist-info → ctao_bdms_clients-0.3.1.dist-info}/WHEEL +0 -0
- {ctao_bdms_clients-0.3.0.dist-info → ctao_bdms_clients-0.3.1.dist-info}/entry_points.txt +0 -0
- {ctao_bdms_clients-0.3.0.dist-info → ctao_bdms_clients-0.3.1.dist-info}/licenses/LICENSE +0 -0
- {ctao_bdms_clients-0.3.0.dist-info → ctao_bdms_clients-0.3.1.dist-info}/top_level.txt +0 -0
bdms/_version.py
CHANGED
bdms/acada_ingest_cli.py
CHANGED
@@ -256,6 +256,7 @@ def setup_logging(log_level, log_file=None):
|
|
256
256
|
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
257
257
|
logging.getLogger("requests").setLevel(logging.WARNING)
|
258
258
|
logging.getLogger("watchdog").setLevel(logging.WARNING)
|
259
|
+
logging.getLogger("charset_normalizer").setLevel(logging.WARNING)
|
259
260
|
|
260
261
|
|
261
262
|
def create_ingestion_client(args) -> IngestionClient:
|
bdms/acada_ingestion.py
CHANGED
@@ -13,13 +13,12 @@ import threading
|
|
13
13
|
import time
|
14
14
|
from concurrent.futures import Future, ProcessPoolExecutor
|
15
15
|
from contextlib import ExitStack
|
16
|
-
from enum import Enum
|
17
16
|
from functools import partial
|
18
17
|
from multiprocessing import cpu_count
|
19
18
|
from pathlib import Path
|
20
19
|
from queue import Empty, Queue
|
21
20
|
from traceback import format_exception
|
22
|
-
from typing import Optional, Union
|
21
|
+
from typing import NamedTuple, Optional, Union
|
23
22
|
|
24
23
|
from astropy.io import fits
|
25
24
|
from filelock import FileLock, Timeout
|
@@ -42,11 +41,10 @@ from bdms.extract_fits_metadata import (
|
|
42
41
|
|
43
42
|
LOGGER = logging.getLogger(__name__)
|
44
43
|
|
45
|
-
__all__ = ["IngestionClient", "FITSVerificationError", "Ingest"]
|
44
|
+
__all__ = ["IngestionClient", "FITSVerificationError", "Ingest", "IngestResult"]
|
46
45
|
|
47
46
|
INGEST_RUNNING_MESSAGE = "Another ingestion process is already running"
|
48
47
|
DETECTED_NEW_TRIGGER_FILE = "Detected new trigger file"
|
49
|
-
INGEST_SUCCESS_MESSAGE = "Successfully ingested"
|
50
48
|
TRIGGER_SUFFIX = ".trigger"
|
51
49
|
|
52
50
|
# Prometheus Metrics for monitoring
|
@@ -58,8 +56,15 @@ N_TASKS_PROCESSED = Counter(
|
|
58
56
|
"n_tasks_processed", "Total number of tasks processed by the Ingest daemon"
|
59
57
|
)
|
60
58
|
TASKS_IN_QUEUE = Gauge("n_tasks_queued", "Current number of queued tasks")
|
59
|
+
BYTES_INGESTED = Counter("bytes_ingested", "Total ingested file size")
|
61
60
|
|
62
|
-
|
61
|
+
|
62
|
+
class IngestResult(NamedTuple):
|
63
|
+
"""Result of the ingestion of a single file."""
|
64
|
+
|
65
|
+
lfn: str
|
66
|
+
skipped: bool
|
67
|
+
file_size: int
|
63
68
|
|
64
69
|
|
65
70
|
class IngestionClient:
|
@@ -257,7 +262,7 @@ class IngestionClient:
|
|
257
262
|
return True
|
258
263
|
return False
|
259
264
|
|
260
|
-
def add_onsite_replica(self, acada_path: Union[str, Path]) ->
|
265
|
+
def add_onsite_replica(self, acada_path: Union[str, Path]) -> IngestResult:
|
261
266
|
"""Register a file as a replica in Rucio on the specified RSE and retrieve its LFN.
|
262
267
|
|
263
268
|
Parameters
|
@@ -295,7 +300,7 @@ class IngestionClient:
|
|
295
300
|
# Check if the replica already exists
|
296
301
|
if self.check_replica_exists(lfn):
|
297
302
|
self.logger.info("Replica already exists for lfn '%s', skipping", lfn)
|
298
|
-
return lfn, True
|
303
|
+
return IngestResult(lfn=lfn, skipped=True, file_size=0)
|
299
304
|
|
300
305
|
# Proceed with registering the replica if check_replica_exists returns False
|
301
306
|
valid, metadata = verify_and_extract_metadata(acada_path)
|
@@ -328,7 +333,7 @@ class IngestionClient:
|
|
328
333
|
self.did_client.set_metadata_bulk(scope=self.scope, name=lfn, meta=metadata)
|
329
334
|
self.logger.info("Set metadata of %r to %r", lfn, metadata)
|
330
335
|
|
331
|
-
return lfn, False
|
336
|
+
return IngestResult(lfn=lfn, skipped=False, file_size=file_size)
|
332
337
|
|
333
338
|
def add_offsite_replication_rules(
|
334
339
|
self,
|
@@ -512,7 +517,7 @@ def verify_and_extract_metadata(fits_path):
|
|
512
517
|
|
513
518
|
def process_file(
|
514
519
|
client: IngestionClient, file_path: str, logger=None, copies: int = 2
|
515
|
-
) ->
|
520
|
+
) -> IngestResult:
|
516
521
|
"""Process a single file with IngestionClient, clean up the trigger file, and return the ingestion status.
|
517
522
|
|
518
523
|
Parameters
|
@@ -533,24 +538,17 @@ def process_file(
|
|
533
538
|
- SKIPPED if the file was already ingested.
|
534
539
|
"""
|
535
540
|
logger = logger or LOGGER.getChild("Ingest")
|
541
|
+
result = client.add_onsite_replica(file_path)
|
542
|
+
|
543
|
+
if not result.skipped:
|
544
|
+
client.add_offsite_replication_rules(result.lfn, copies=copies)
|
545
|
+
|
536
546
|
trigger_file = Path(file_path + TRIGGER_SUFFIX)
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
trigger_file.unlink()
|
543
|
-
logger.debug("Removed trigger file %s", trigger_file)
|
544
|
-
return IngestStatus.SKIPPED
|
545
|
-
client.add_offsite_replication_rules(lfn, copies=copies)
|
546
|
-
logger.info("%s %s, LFN: %s", INGEST_SUCCESS_MESSAGE, file_path, lfn)
|
547
|
-
if trigger_file.exists():
|
548
|
-
trigger_file.unlink()
|
549
|
-
logger.debug("Removed trigger file %s", trigger_file)
|
550
|
-
return IngestStatus.SUCCESS
|
551
|
-
except Exception as e:
|
552
|
-
logger.exception("Exception in process_file for %s: %s", file_path, str(e))
|
553
|
-
return IngestStatus.FAILURE
|
547
|
+
if trigger_file.exists():
|
548
|
+
trigger_file.unlink()
|
549
|
+
logger.debug("Removed trigger file %s", trigger_file)
|
550
|
+
|
551
|
+
return result
|
554
552
|
|
555
553
|
|
556
554
|
class TriggerFileHandler(FileSystemEventHandler):
|
@@ -756,7 +754,7 @@ class Ingest:
|
|
756
754
|
rate = processed_count / elapsed_time if elapsed_time > 0 else 0
|
757
755
|
|
758
756
|
# Clean up task tracking
|
759
|
-
self.submitted_tasks.pop(task_id, None)
|
757
|
+
path = self.submitted_tasks.pop(task_id, None)
|
760
758
|
current_concurrent = len(self.submitted_tasks)
|
761
759
|
TASKS_IN_QUEUE.dec() # Always decrement queue counter
|
762
760
|
|
@@ -767,30 +765,33 @@ class Ingest:
|
|
767
765
|
)
|
768
766
|
|
769
767
|
# Process the result
|
768
|
+
# the order here is important, as the methods on the future object
|
769
|
+
# raise if the future is in the wrong state
|
770
770
|
if future.cancelled():
|
771
771
|
status = "cancelled"
|
772
|
-
N_TASKS_CANCELLED.inc()
|
772
|
+
N_TASKS_CANCELLED.inc()
|
773
|
+
|
773
774
|
elif (e := future.exception()) is not None:
|
774
775
|
self.logger.error(
|
775
|
-
"Task %d failed: %s",
|
776
|
+
"Task %d for path %s failed: %s",
|
776
777
|
task_id,
|
778
|
+
path,
|
777
779
|
"".join(format_exception(type(e), e, e.__traceback__)),
|
778
780
|
)
|
779
781
|
status = "failed"
|
780
|
-
N_TASKS_FAILED.inc()
|
782
|
+
N_TASKS_FAILED.inc()
|
783
|
+
|
781
784
|
else:
|
782
785
|
result = future.result()
|
783
|
-
if result
|
786
|
+
if not result.skipped:
|
784
787
|
status = "success"
|
785
|
-
N_TASKS_SUCCESS.inc()
|
786
|
-
|
787
|
-
status = "skipped"
|
788
|
-
N_TASKS_SKIPPED.inc() # Increment skipped counter
|
788
|
+
N_TASKS_SUCCESS.inc()
|
789
|
+
BYTES_INGESTED.inc(result.file_size)
|
789
790
|
else:
|
790
|
-
status = "
|
791
|
-
|
791
|
+
status = "skipped"
|
792
|
+
N_TASKS_SKIPPED.inc()
|
792
793
|
|
793
|
-
N_TASKS_PROCESSED.inc()
|
794
|
+
N_TASKS_PROCESSED.inc()
|
794
795
|
|
795
796
|
# Summary log for all cases
|
796
797
|
self.logger.info(
|
@@ -896,8 +897,8 @@ class Ingest:
|
|
896
897
|
result_thread.start()
|
897
898
|
self.logger.info("Started result processing thread")
|
898
899
|
|
899
|
-
|
900
|
-
|
900
|
+
self.executor = ProcessPoolExecutor(max_workers=self.num_workers)
|
901
|
+
with self.executor:
|
901
902
|
self.logger.info(
|
902
903
|
"Started process pool with %d workers", self.num_workers
|
903
904
|
)
|
@@ -29,11 +29,9 @@ from watchdog.events import FileMovedEvent
|
|
29
29
|
|
30
30
|
from bdms.acada_ingestion import (
|
31
31
|
DETECTED_NEW_TRIGGER_FILE,
|
32
|
-
INGEST_SUCCESS_MESSAGE,
|
33
32
|
TRIGGER_SUFFIX,
|
34
33
|
Ingest,
|
35
34
|
IngestionClient,
|
36
|
-
IngestStatus,
|
37
35
|
TriggerFileHandler,
|
38
36
|
process_file,
|
39
37
|
)
|
@@ -238,7 +236,8 @@ def test_add_onsite_replica_with_minio_fits_file(
|
|
238
236
|
reset_xrootd_permissions(storage_mount_path)
|
239
237
|
|
240
238
|
# Use add_onsite_replica to register the replica
|
241
|
-
lfn, skipped = ingestion_client.add_onsite_replica(acada_path=acada_path)
|
239
|
+
lfn, skipped, size = ingestion_client.add_onsite_replica(acada_path=acada_path)
|
240
|
+
assert size == os.stat(acada_path).st_size
|
242
241
|
|
243
242
|
# Verify the LFN matches the expected LFN
|
244
243
|
expected_lfn = ingestion_client.acada_to_lfn(acada_path)
|
@@ -267,11 +266,12 @@ def test_add_onsite_replica_with_minio_fits_file(
|
|
267
266
|
|
268
267
|
# Check for don't ingest again if its already registered
|
269
268
|
caplog.clear()
|
270
|
-
lfn_check, skipped_check = ingestion_client.add_onsite_replica(
|
269
|
+
lfn_check, skipped_check, size = ingestion_client.add_onsite_replica(
|
271
270
|
acada_path=acada_path
|
272
271
|
)
|
273
272
|
msg = f"LFN mismatch on second ingestion attempt: expected {lfn}, got {lfn_check}"
|
274
273
|
assert lfn_check == lfn, msg
|
274
|
+
assert size == 0, "Expected size 0 for skipped file"
|
275
275
|
|
276
276
|
msg = (
|
277
277
|
"Expected the file to be skipped on second ingestion, but it was ingested again"
|
@@ -672,14 +672,17 @@ def test_process_file_success(
|
|
672
672
|
scope=test_scope,
|
673
673
|
)
|
674
674
|
|
675
|
-
acada_path,
|
675
|
+
acada_path, test_file_content = onsite_test_file
|
676
676
|
test_file = acada_path
|
677
677
|
trigger_file = Path(str(test_file) + TRIGGER_SUFFIX)
|
678
678
|
trigger_file.symlink_to(test_file)
|
679
679
|
result = process_file(ingestion_client, str(test_file))
|
680
|
-
|
680
|
+
|
681
|
+
assert result.file_size == len(test_file_content)
|
682
|
+
assert not result.skipped
|
681
683
|
assert not trigger_file.exists()
|
682
|
-
assert
|
684
|
+
assert "Successfully registered the replica for lfn" in caplog.text
|
685
|
+
assert "Created 2 offsite replication rule(s) for LFN" in caplog.text
|
683
686
|
|
684
687
|
|
685
688
|
@pytest.mark.usefixtures("_auth_proxy")
|
@@ -694,19 +697,26 @@ def test_process_file_skipped(
|
|
694
697
|
scope=test_scope,
|
695
698
|
)
|
696
699
|
|
697
|
-
acada_path,
|
700
|
+
acada_path, test_file_content = onsite_test_file
|
698
701
|
test_file = acada_path
|
699
702
|
trigger_file = Path(str(test_file) + TRIGGER_SUFFIX)
|
700
703
|
trigger_file.symlink_to(test_file)
|
701
|
-
|
704
|
+
|
705
|
+
# process file for the first time
|
706
|
+
result = process_file(ingestion_client, str(test_file))
|
707
|
+
assert not result.skipped
|
708
|
+
assert result.file_size == len(test_file_content)
|
709
|
+
|
702
710
|
caplog.clear()
|
711
|
+
# process file second time to verify it is skipped
|
703
712
|
result = process_file(ingestion_client, str(test_file))
|
704
|
-
assert result
|
713
|
+
assert result.skipped
|
714
|
+
assert result.file_size == 0
|
705
715
|
assert "Replica already exists" in caplog.text
|
706
716
|
|
707
717
|
|
708
718
|
@pytest.mark.usefixtures("_auth_proxy")
|
709
|
-
def test_process_file_failure(storage_mount_path,
|
719
|
+
def test_process_file_failure(storage_mount_path, tmp_path):
|
710
720
|
"""Test for checking failure for invalid file paths"""
|
711
721
|
ingestion_client = IngestionClient(
|
712
722
|
data_path=storage_mount_path,
|
@@ -721,18 +731,8 @@ def test_process_file_failure(storage_mount_path, caplog, tmp_path):
|
|
721
731
|
trigger_file.symlink_to(invalid_file)
|
722
732
|
|
723
733
|
# The file path is outside the data_path causing a ValueError in acada_to_lfn
|
724
|
-
|
725
|
-
|
726
|
-
# Verify the function returns FAILURE status instead of raising an exception
|
727
|
-
assert result == IngestStatus.FAILURE
|
728
|
-
|
729
|
-
# Check for the actual error message that gets logged
|
730
|
-
assert "Exception in process_file" in caplog.text
|
731
|
-
# Verify the file path is in the error message
|
732
|
-
assert str(invalid_file) in caplog.text
|
733
|
-
|
734
|
-
# Verify that no success message was logged
|
735
|
-
assert INGEST_SUCCESS_MESSAGE not in caplog.text
|
734
|
+
with pytest.raises(ValueError, match="is not within data_path"):
|
735
|
+
process_file(ingestion_client, str(invalid_file))
|
736
736
|
|
737
737
|
# Trigger file should still exist since ingestion failed
|
738
738
|
msg = "Trigger file should not be removed when ingestion fails"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: ctao-bdms-clients
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.1
|
4
4
|
Summary: Client module for the CTAO DPPS Bulk Data Management System
|
5
5
|
Author-email: Georgios Zacharis <georgios.zacharis@inaf.it>, Stefano Gallozzi <Stefano.gallozzi@inaf.it>, Michele Mastropietro <michele.mastropietro@inaf.it>, Syed Anwar Ul Hasan <syedanwarul.hasan@cta-consortium.org>, Maximilian Linhoff <maximilian.linhoff@cta-observatory.org>, Volodymyr Savchenko <Volodymyr.Savchenko@epfl.ch>
|
6
6
|
License-Expression: BSD-3-Clause
|
@@ -1,13 +1,13 @@
|
|
1
1
|
bdms/__init__.py,sha256=7btE6tNhFqXSv2eUhZ-0m1J3nTTs4Xo6HWcQI4eh5Do,142
|
2
|
-
bdms/_version.py,sha256=
|
3
|
-
bdms/acada_ingest_cli.py,sha256=
|
4
|
-
bdms/acada_ingestion.py,sha256=
|
2
|
+
bdms/_version.py,sha256=lOWWIGJeBi0KkFopWU_n3GH71C1PsaZ-ZYDfxFkne6c,511
|
3
|
+
bdms/acada_ingest_cli.py,sha256=_AksS4NFhG6SsHbhjFnO-pgEKsIuTDFX7m_iraK-C7A,12936
|
4
|
+
bdms/acada_ingestion.py,sha256=Z44s9meJC2OSJ-pL0cxHiDHmwF33xYNaTbwcqKq00VE,35782
|
5
5
|
bdms/extract_fits_metadata.py,sha256=ZGJQCFJCXkWg8N3CAb17GB-wwPj-wTvNg0JOS-MemZ0,3431
|
6
6
|
bdms/version.py,sha256=mTfi1WzbIs991NyImM6mcMg1R39a6U1W2pKnk-Tt5Vw,765
|
7
7
|
bdms/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
8
|
bdms/tests/conftest.py,sha256=n7KN9foojCXDxFuZinI0MvhnSvLk5Mn7aFmjQKmO8eI,7364
|
9
9
|
bdms/tests/test_acada_ingest_cli.py,sha256=SYVt1xlEDsrbPX0C5Isf0thjUcaxr7cjflyZSwpPBaw,8314
|
10
|
-
bdms/tests/test_acada_ingestion.py,sha256=
|
10
|
+
bdms/tests/test_acada_ingestion.py,sha256=sJV7m_rzNTx2J7Zz5twArj7ME6Os4KiGMK7OI5Fm5Ko,59554
|
11
11
|
bdms/tests/test_basic_rucio_functionality.py,sha256=9GIX8IO6wBJm40LKFEH2StS-fMKvC07sxFHPVR7dftU,3583
|
12
12
|
bdms/tests/test_dpps_rel_0_0.py,sha256=2NhxpdhXQg_8lmK-tRrPQ_FcijsIEfv07x-kVlT8Zik,3138
|
13
13
|
bdms/tests/test_extract_fits_metadata.py,sha256=A935WD2TF3lBcaeDmzGSlH2IXUF1v8qslrsW30lnEAA,3490
|
@@ -15,9 +15,9 @@ bdms/tests/test_file_replicas.py,sha256=NqutrSJa5ME50JpmyATNPSLqq1AOq1ruv84XSY3P
|
|
15
15
|
bdms/tests/test_metadata.py,sha256=f0tSqNGlYe-ydoSDJw0k1De2kHoPl6g-GYBj_jP6kCY,3728
|
16
16
|
bdms/tests/test_onsite_storage.py,sha256=waK7t9kBquzJbuLLYcpeNU9YuA70XTRS88RMxBWxawI,3765
|
17
17
|
bdms/tests/utils.py,sha256=PUayWe60JGVDs5mkWmHVjFV_yqg5XUQlxoAvhT1P0OM,4101
|
18
|
-
ctao_bdms_clients-0.3.
|
19
|
-
ctao_bdms_clients-0.3.
|
20
|
-
ctao_bdms_clients-0.3.
|
21
|
-
ctao_bdms_clients-0.3.
|
22
|
-
ctao_bdms_clients-0.3.
|
23
|
-
ctao_bdms_clients-0.3.
|
18
|
+
ctao_bdms_clients-0.3.1.dist-info/licenses/LICENSE,sha256=Py9riZY_f0CmXbrZ5JreE3WgglyWkRnwUfqydvX6jxE,1556
|
19
|
+
ctao_bdms_clients-0.3.1.dist-info/METADATA,sha256=U8xOFh-YkxRI9hqHftAvdDji4UbCPCPvsFwVLEUfYGo,2514
|
20
|
+
ctao_bdms_clients-0.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
21
|
+
ctao_bdms_clients-0.3.1.dist-info/entry_points.txt,sha256=YZCIOePi_xXaJunA6lAQxAKh1tn3wOd4pmqymFRvah4,60
|
22
|
+
ctao_bdms_clients-0.3.1.dist-info/top_level.txt,sha256=ao0U8aA33KRHpcqmr7yrK8y2AQ6ahSu514tfaN4hDV8,5
|
23
|
+
ctao_bdms_clients-0.3.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|