dirac-cwl 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dirac_cwl/__init__.py +28 -0
- dirac_cwl/commands/__init__.py +5 -0
- dirac_cwl/commands/core.py +37 -0
- dirac_cwl/commands/download_config.py +22 -0
- dirac_cwl/commands/group_outputs.py +32 -0
- dirac_cwl/core/__init__.py +1 -0
- dirac_cwl/core/exceptions.py +5 -0
- dirac_cwl/core/utility.py +41 -0
- dirac_cwl/data_management_mocks/data_manager.py +99 -0
- dirac_cwl/data_management_mocks/file_catalog.py +132 -0
- dirac_cwl/data_management_mocks/sandbox.py +89 -0
- dirac_cwl/execution_hooks/__init__.py +40 -0
- dirac_cwl/execution_hooks/core.py +342 -0
- dirac_cwl/execution_hooks/plugins/__init__.py +16 -0
- dirac_cwl/execution_hooks/plugins/core.py +58 -0
- dirac_cwl/execution_hooks/registry.py +209 -0
- dirac_cwl/job/__init__.py +249 -0
- dirac_cwl/job/job_wrapper.py +375 -0
- dirac_cwl/job/job_wrapper_template.py +56 -0
- dirac_cwl/job/submission_clients.py +166 -0
- dirac_cwl/modules/crypto.py +96 -0
- dirac_cwl/modules/pi_gather.py +41 -0
- dirac_cwl/modules/pi_simulate.py +33 -0
- dirac_cwl/production/__init__.py +200 -0
- dirac_cwl/submission_models.py +157 -0
- dirac_cwl/transformation/__init__.py +203 -0
- dirac_cwl-1.0.2.dist-info/METADATA +285 -0
- dirac_cwl-1.0.2.dist-info/RECORD +32 -0
- dirac_cwl-1.0.2.dist-info/WHEEL +5 -0
- dirac_cwl-1.0.2.dist-info/entry_points.txt +8 -0
- dirac_cwl-1.0.2.dist-info/licenses/LICENSE +674 -0
- dirac_cwl-1.0.2.dist-info/top_level.txt +1 -0
dirac_cwl/__init__.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""DIRAC CWL Proto - Common Workflow Language integration for DIRAC."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
|
|
8
|
+
from dirac_cwl.job import app as job_app
|
|
9
|
+
from dirac_cwl.production import app as production_app
|
|
10
|
+
from dirac_cwl.transformation import app as transformation_app
|
|
11
|
+
|
|
12
|
+
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s | %(name)s | %(levelname)s | %(message)s")
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
__version__ = version("dirac-cwl")
|
|
16
|
+
except PackageNotFoundError:
|
|
17
|
+
# package is not installed
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
app = typer.Typer()
|
|
21
|
+
|
|
22
|
+
# Add sub-apps
|
|
23
|
+
app.add_typer(production_app, name="production")
|
|
24
|
+
app.add_typer(transformation_app, name="transformation")
|
|
25
|
+
app.add_typer(job_app, name="job")
|
|
26
|
+
|
|
27
|
+
if __name__ == "__main__":
|
|
28
|
+
app()
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Core base classes for workflow processing commands."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class CommandBase(ABC):
|
|
8
|
+
"""Base abstract class for pre/post-processing commands.
|
|
9
|
+
|
|
10
|
+
New commands **MUST NOT** inherit this class. Instead they should inherit the interface classes
|
|
11
|
+
:class:`dirac_cwl.commands.base.PreProcessCommand` and
|
|
12
|
+
:class:`dirac_cwl.commands.base.PostProcessCommand`
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def execute(self, job_path: Path, **kwargs) -> None:
|
|
17
|
+
"""Execute the command in the given job path.
|
|
18
|
+
|
|
19
|
+
:param job_path: Path to the job working directory.
|
|
20
|
+
:param kwargs: Additional keyword arguments.
|
|
21
|
+
:raises NotImplementedError: This method must be implemented by subclasses.
|
|
22
|
+
"""
|
|
23
|
+
raise NotImplementedError("This method should be implemented by child class")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class PreProcessCommand(CommandBase):
|
|
27
|
+
"""Interface class for pre-processing commands.
|
|
28
|
+
|
|
29
|
+
Every pre-processing command must inherit this class. Used for type validation.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class PostProcessCommand(CommandBase):
|
|
34
|
+
"""Interface class for post-processing commands.
|
|
35
|
+
|
|
36
|
+
Every post-processing command must inherit this class. Used for type validation.
|
|
37
|
+
"""
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Example pre-processing command that downloads configuration."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from dirac_cwl.commands import PreProcessCommand
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class DownloadConfig(PreProcessCommand):
|
|
9
|
+
"""Example command that creates a file with named 'content.cfg'."""
|
|
10
|
+
|
|
11
|
+
def execute(self, job_path, **kwargs):
|
|
12
|
+
"""Execute the configuration download.
|
|
13
|
+
|
|
14
|
+
:param job_path: Path to the job working directory.
|
|
15
|
+
:param kwargs: Additional keyword arguments.
|
|
16
|
+
"""
|
|
17
|
+
content = """\
|
|
18
|
+
This is an example
|
|
19
|
+
"""
|
|
20
|
+
file_path = os.path.join(job_path, "content.cfg")
|
|
21
|
+
with open(file_path, "w") as f:
|
|
22
|
+
f.write(content)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Example post-processing command that groups output files."""
|
|
2
|
+
|
|
3
|
+
import glob
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
from dirac_cwl.commands import PostProcessCommand
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class GroupOutputs(PostProcessCommand):
|
|
10
|
+
"""Example command that merges all of the outputs in a singular file."""
|
|
11
|
+
|
|
12
|
+
def execute(self, job_path, **kwargs):
|
|
13
|
+
"""Execute the output file grouping.
|
|
14
|
+
|
|
15
|
+
:param job_path: Path to the job working directory.
|
|
16
|
+
:param kwargs: Additional keyword arguments.
|
|
17
|
+
"""
|
|
18
|
+
grouped_outputs = "group.out"
|
|
19
|
+
output_path = os.path.join(job_path, grouped_outputs)
|
|
20
|
+
output_files = ["*.out", "*.txt"]
|
|
21
|
+
|
|
22
|
+
with open(output_path, "w", encoding="utf-8") as f_in:
|
|
23
|
+
for file_type in output_files:
|
|
24
|
+
extension = f"{job_path}/{file_type}"
|
|
25
|
+
for file in glob.glob(extension):
|
|
26
|
+
if file == output_path:
|
|
27
|
+
continue
|
|
28
|
+
|
|
29
|
+
with open(file, "r", encoding="utf-8") as f_out:
|
|
30
|
+
f_in.write(f"############ {file}\n")
|
|
31
|
+
f_in.writelines(f_out.readlines())
|
|
32
|
+
f_in.write("\n")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Dirac-cwl core package."""
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Utility functions for file catalog operations."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from cwl_utils.parser.cwl_v1_2 import (
|
|
7
|
+
File,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_lfns(input_data: dict[str, Any]) -> dict[str, list[Path]]:
|
|
12
|
+
"""Get the list of LFNs in the inputs from the parameters.
|
|
13
|
+
|
|
14
|
+
:param input_data: The parameters of the job.
|
|
15
|
+
:return: The list of LFN paths.
|
|
16
|
+
"""
|
|
17
|
+
# Get the files from the input data
|
|
18
|
+
files: dict[str, list[Path]] = {}
|
|
19
|
+
for input_name, input_value in input_data.items():
|
|
20
|
+
val = []
|
|
21
|
+
if isinstance(input_value, list):
|
|
22
|
+
for item in input_value:
|
|
23
|
+
if isinstance(item, File):
|
|
24
|
+
if not item.location and not item.path:
|
|
25
|
+
raise NotImplementedError("File location is not defined.")
|
|
26
|
+
|
|
27
|
+
if not item.location:
|
|
28
|
+
continue
|
|
29
|
+
# Skip files from the File Catalog
|
|
30
|
+
if item.location.startswith("lfn:"):
|
|
31
|
+
val.append(Path(item.location))
|
|
32
|
+
files[input_name] = val
|
|
33
|
+
elif isinstance(input_value, File):
|
|
34
|
+
if not input_value.location and not input_value.path:
|
|
35
|
+
raise NotImplementedError("File location is not defined.")
|
|
36
|
+
if not input_value.location:
|
|
37
|
+
continue
|
|
38
|
+
if input_value.location.startswith("lfn:"):
|
|
39
|
+
val.append(Path(input_value.location))
|
|
40
|
+
files[input_name] = val
|
|
41
|
+
return files
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Mock DIRAC data manager for local file storage operations."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from DIRAC.DataManagementSystem.Client.DataManager import DataManager # type: ignore[import-untyped]
|
|
6
|
+
from DIRAC.Resources.Storage.FileStorage import FileStorage # type: ignore[import-untyped]
|
|
7
|
+
from DIRACCommon.Core.Utilities.ReturnValues import S_ERROR, S_OK, returnSingleResult # type: ignore[import-untyped]
|
|
8
|
+
|
|
9
|
+
from dirac_cwl.data_management_mocks.file_catalog import LocalFileCatalog
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class MockDataManager(DataManager):
|
|
13
|
+
"""Mock DIRAC DataManager for local file storage."""
|
|
14
|
+
|
|
15
|
+
def __init__(self):
|
|
16
|
+
"""Initialize the mock data manager with local file catalog."""
|
|
17
|
+
self.base_storage_path = "filecatalog"
|
|
18
|
+
self.storage_element = FileStorage("local", {"Path": self.base_storage_path})
|
|
19
|
+
self.fileCatalog = LocalFileCatalog()
|
|
20
|
+
|
|
21
|
+
def getFile(self, lfn, destinationDir=".", sourceSE=None, diskOnly=False):
|
|
22
|
+
"""Get local copy of LFN(s) from Storage Elements.
|
|
23
|
+
|
|
24
|
+
:param mixed lfn: a single LFN or list of LFNs.
|
|
25
|
+
:param str destinationDir: directory to which the file(s) will be
|
|
26
|
+
downloaded. (Default: current working directory).
|
|
27
|
+
:param str sourceSE: source SE from which to download (Default: all replicas will be attempted).
|
|
28
|
+
:param bool diskOnly: chooses the disk ONLY replica(s). (Default: False)
|
|
29
|
+
:return: S_OK({"Successful": {}, "Failed": {}})/S_ERROR(errMessage).
|
|
30
|
+
"""
|
|
31
|
+
if isinstance(lfn, list):
|
|
32
|
+
lfns = lfn
|
|
33
|
+
elif isinstance(lfn, str):
|
|
34
|
+
lfns = [lfn]
|
|
35
|
+
else:
|
|
36
|
+
return S_ERROR(f"wrong type for lfn: {lfn}, expected str or list[str]")
|
|
37
|
+
|
|
38
|
+
if not sourceSE:
|
|
39
|
+
sourceSE = self.storage_element
|
|
40
|
+
|
|
41
|
+
success = {}
|
|
42
|
+
fail = {}
|
|
43
|
+
for lfn in lfns:
|
|
44
|
+
res = sourceSE.getFile(
|
|
45
|
+
str(
|
|
46
|
+
Path(self.base_storage_path) / str(lfn).removeprefix("lfn:").removeprefix("LFN:").removeprefix("/")
|
|
47
|
+
),
|
|
48
|
+
destinationDir,
|
|
49
|
+
)
|
|
50
|
+
if not res["OK"]:
|
|
51
|
+
fail[lfn] = res["Message"]
|
|
52
|
+
elif res["Value"]["Failed"]:
|
|
53
|
+
fail[lfn] = res["Value"]["Failed"]
|
|
54
|
+
else:
|
|
55
|
+
success[lfn] = str(Path(destinationDir).resolve() / Path(lfn).name)
|
|
56
|
+
return S_OK({"Successful": success, "Failed": fail})
|
|
57
|
+
|
|
58
|
+
def putAndRegister(
|
|
59
|
+
self,
|
|
60
|
+
lfn,
|
|
61
|
+
fileName,
|
|
62
|
+
diracSE,
|
|
63
|
+
guid=None,
|
|
64
|
+
path=None,
|
|
65
|
+
checksum=None,
|
|
66
|
+
overwrite=None,
|
|
67
|
+
):
|
|
68
|
+
"""Put a local file to a Storage Element and register in the File Catalogues.
|
|
69
|
+
|
|
70
|
+
:param lfn: The file LFN.
|
|
71
|
+
:param fileName: The full path to the local file.
|
|
72
|
+
:param diracSE: The Storage Element to which to put the file.
|
|
73
|
+
:param guid: The guid with which the file is to be registered (if not provided will be generated).
|
|
74
|
+
:param path: The path on the storage where the file will be put (if not provided the LFN will be used).
|
|
75
|
+
:param checksum: File checksum (optional).
|
|
76
|
+
:param overwrite: Removes file from the file catalogue and SE before attempting upload.
|
|
77
|
+
"""
|
|
78
|
+
self.fileCatalog.addFile(lfn)
|
|
79
|
+
return self.put(lfn, fileName, diracSE, path)
|
|
80
|
+
|
|
81
|
+
def put(self, lfn, fileName, diracSE, path=None):
|
|
82
|
+
"""Put a local file to a Storage Element.
|
|
83
|
+
|
|
84
|
+
:param str lfn: LFN.
|
|
85
|
+
:param str fileName: The full path to the local file.
|
|
86
|
+
:param str diracSE: The Storage Element to which to put the file.
|
|
87
|
+
:param str path: The path on the storage where the file will be put (if not provided the LFN will be used).
|
|
88
|
+
:return: S_OK({"Successful": {...}, "Failed": {...}})/S_ERROR(errMessage).
|
|
89
|
+
"""
|
|
90
|
+
se = self.storage_element
|
|
91
|
+
if not se:
|
|
92
|
+
return S_ERROR("No Storage Element defined")
|
|
93
|
+
if not path:
|
|
94
|
+
path = str(lfn).removeprefix("lfn:").removeprefix("LFN:").removeprefix("/")
|
|
95
|
+
dest = str(Path(self.base_storage_path) / Path(path))
|
|
96
|
+
res = returnSingleResult(se.putFile({dest: fileName}))
|
|
97
|
+
if not res["OK"]:
|
|
98
|
+
return S_OK({"Successful": {}, "Failed": {lfn: res["Message"]}})
|
|
99
|
+
return S_OK({"Successful": {lfn: res["Value"]}, "Failed": {}})
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""Mock file catalog implementations for local testing."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import time
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from DIRAC import S_ERROR, S_OK # type: ignore[import-untyped]
|
|
8
|
+
from DIRAC.Resources.Catalog.FileCatalog import FileCatalog # type: ignore[import-untyped]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class InMemoryFileCatalog(FileCatalog):
|
|
12
|
+
"""Minimal in-memory FileCatalog compatible with DIRAC DataManager."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, catalogs=None, vo=None):
|
|
15
|
+
"""Initialize the in-memory file catalog.
|
|
16
|
+
|
|
17
|
+
:param catalogs: Catalog configuration (unused).
|
|
18
|
+
:param vo: Virtual organization (unused).
|
|
19
|
+
"""
|
|
20
|
+
self._eligibleCatalogs = {}
|
|
21
|
+
self._files = {} # store metadata and logical file names
|
|
22
|
+
super(FileCatalog, self).__init__()
|
|
23
|
+
|
|
24
|
+
def _getEligibleCatalogs(self):
|
|
25
|
+
"""Get eligible catalogs for this file catalog.
|
|
26
|
+
|
|
27
|
+
:return: S_OK with catalog configuration.
|
|
28
|
+
"""
|
|
29
|
+
self._eligibleCatalogs = {"MyMockCatalog": {"Type": "MockFileCatalog", "Backend": "Memory"}}
|
|
30
|
+
return S_OK(self._eligibleCatalogs)
|
|
31
|
+
|
|
32
|
+
def findFile(self, lfn):
|
|
33
|
+
"""Find a file in the catalog by LFN.
|
|
34
|
+
|
|
35
|
+
:param lfn: Logical file name.
|
|
36
|
+
:return: S_OK with file metadata or S_ERROR if not found.
|
|
37
|
+
"""
|
|
38
|
+
if lfn in self._files:
|
|
39
|
+
return S_OK([self._files[lfn]])
|
|
40
|
+
return S_ERROR(f"File {lfn} not found")
|
|
41
|
+
|
|
42
|
+
def addFile(self, lfn, metadata=None):
|
|
43
|
+
"""Add a file to the catalog.
|
|
44
|
+
|
|
45
|
+
:param lfn: Logical file name.
|
|
46
|
+
:param metadata: Optional file metadata.
|
|
47
|
+
:return: S_OK with LFN or S_ERROR if file already exists.
|
|
48
|
+
"""
|
|
49
|
+
if lfn in self._files:
|
|
50
|
+
return S_ERROR(f"File {lfn} already exists")
|
|
51
|
+
self._files[lfn] = {"LFN": lfn, "Metadata": metadata or {}}
|
|
52
|
+
return S_OK(lfn)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class LocalFileCatalog(FileCatalog):
|
|
56
|
+
"""File catalog implementation using local filesystem storage."""
|
|
57
|
+
|
|
58
|
+
def __init__(self, catalogs=None, vo=None):
|
|
59
|
+
"""Initialize the local file catalog.
|
|
60
|
+
|
|
61
|
+
:param catalogs: Catalog configuration (unused).
|
|
62
|
+
:param vo: Virtual organization (unused).
|
|
63
|
+
"""
|
|
64
|
+
self._eligibleCatalogs = {"MyMockCatalog": {"Type": "MockFileCatalog", "Backend": "LocalFileSystem"}}
|
|
65
|
+
self._metadataPath = "filecatalog/metadata.json"
|
|
66
|
+
super(FileCatalog, self).__init__()
|
|
67
|
+
|
|
68
|
+
def _getEligibleCatalogs(self):
|
|
69
|
+
"""Get eligible catalogs for this file catalog.
|
|
70
|
+
|
|
71
|
+
:return: S_OK with catalog configuration.
|
|
72
|
+
"""
|
|
73
|
+
return S_OK(self._eligibleCatalogs)
|
|
74
|
+
|
|
75
|
+
def getFileMetadata(self, lfn):
|
|
76
|
+
"""Get metadata for a file.
|
|
77
|
+
|
|
78
|
+
:param lfn: Logical file name.
|
|
79
|
+
:return: S_OK with metadata dict or failed dict.
|
|
80
|
+
"""
|
|
81
|
+
metaAll = self._getAllMetadata()
|
|
82
|
+
if lfn not in metaAll:
|
|
83
|
+
return S_OK({"Successful": {}, "Failed": {lfn: f"File {lfn} not found"}})
|
|
84
|
+
return S_OK({"Successful": {lfn: metaAll[lfn]}, "Failed": {}})
|
|
85
|
+
|
|
86
|
+
def addFile(self, lfn):
|
|
87
|
+
"""Add a file to the catalog.
|
|
88
|
+
|
|
89
|
+
:param lfn: Logical file name.
|
|
90
|
+
:return: S_OK with success/failed dict or S_ERROR if file exists.
|
|
91
|
+
"""
|
|
92
|
+
if lfn in self._getAllMetadata():
|
|
93
|
+
return S_ERROR(f"File {lfn} already exists")
|
|
94
|
+
self.setMetadata(lfn, {"CreationDate": time.time()})
|
|
95
|
+
return S_OK({"Successful": {lfn: True}, "Failed": {}})
|
|
96
|
+
|
|
97
|
+
def setMetadata(self, lfn, metadataDict):
|
|
98
|
+
"""Set metadata for a file.
|
|
99
|
+
|
|
100
|
+
:param lfn: Logical file name.
|
|
101
|
+
:param metadataDict: Metadata dictionary to set.
|
|
102
|
+
:return: S_OK with success/failed dict or S_ERROR on failure.
|
|
103
|
+
"""
|
|
104
|
+
meta = self._getAllMetadata()
|
|
105
|
+
meta[lfn] = metadataDict
|
|
106
|
+
|
|
107
|
+
try:
|
|
108
|
+
self._setAllMetadata(meta)
|
|
109
|
+
except Exception as e:
|
|
110
|
+
return S_ERROR(f"Could set metadata: {e}")
|
|
111
|
+
return S_OK({"Successful": {lfn: True}, "Failed": {}})
|
|
112
|
+
|
|
113
|
+
def _getAllMetadata(self):
|
|
114
|
+
"""Get all metadata from the local file.
|
|
115
|
+
|
|
116
|
+
:return: Dictionary of all file metadata.
|
|
117
|
+
"""
|
|
118
|
+
try:
|
|
119
|
+
with open(self._metadataPath, "r") as file:
|
|
120
|
+
meta = json.load(file)
|
|
121
|
+
except Exception:
|
|
122
|
+
meta = {}
|
|
123
|
+
return meta
|
|
124
|
+
|
|
125
|
+
def _setAllMetadata(self, metadata):
|
|
126
|
+
"""Save all metadata to the local file.
|
|
127
|
+
|
|
128
|
+
:param metadata: Dictionary of file metadata to save.
|
|
129
|
+
"""
|
|
130
|
+
Path(self._metadataPath).parent.mkdir(parents=True, exist_ok=True)
|
|
131
|
+
with open(self._metadataPath, "w+") as file:
|
|
132
|
+
json.dump(metadata, file)
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Mock DIRAC sandbox store client for local file operations."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import tarfile
|
|
7
|
+
import tempfile
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Literal, Sequence
|
|
10
|
+
|
|
11
|
+
import zstandard
|
|
12
|
+
from diracx.core.models import SandboxInfo
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
SANDBOX_CHECKSUM_ALGORITHM = "sha256"
|
|
17
|
+
SANDBOX_COMPRESSION: Literal["zst"] = "zst"
|
|
18
|
+
|
|
19
|
+
# Get the project root directory (where pyproject.toml is located)
|
|
20
|
+
PROJECT_ROOT = Path(__file__).parent.parent.parent.parent
|
|
21
|
+
SANDBOX_STORE_DIR = PROJECT_ROOT / "sandboxstore"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def create_sandbox(paths: Sequence[str | Path]):
|
|
25
|
+
"""Upload a sandbox archive to the sandboxstore.
|
|
26
|
+
|
|
27
|
+
:param paths: File paths to be uploaded in the sandbox.
|
|
28
|
+
"""
|
|
29
|
+
with tempfile.TemporaryFile(mode="w+b") as tar_fh:
|
|
30
|
+
# Create zstd compressed tar with level 18 and long matching enabled
|
|
31
|
+
compression_params = zstandard.ZstdCompressionParameters.from_level(18, enable_ldm=1)
|
|
32
|
+
cctx = zstandard.ZstdCompressor(compression_params=compression_params)
|
|
33
|
+
with cctx.stream_writer(tar_fh, closefd=False) as compressor:
|
|
34
|
+
with tarfile.open(fileobj=compressor, mode="w|") as tf:
|
|
35
|
+
for path in paths:
|
|
36
|
+
if isinstance(path, str):
|
|
37
|
+
path = Path(path)
|
|
38
|
+
logger.debug("Adding %s to sandbox as %s", path.resolve(), path.name)
|
|
39
|
+
tf.add(path.resolve(), path.name, recursive=True)
|
|
40
|
+
tar_fh.seek(0)
|
|
41
|
+
|
|
42
|
+
# Generate sandbox checksum
|
|
43
|
+
hasher = getattr(hashlib, SANDBOX_CHECKSUM_ALGORITHM)()
|
|
44
|
+
while data := tar_fh.read(512 * 1024):
|
|
45
|
+
hasher.update(data)
|
|
46
|
+
checksum = hasher.hexdigest()
|
|
47
|
+
tar_fh.seek(0)
|
|
48
|
+
logger.debug("Sandbox checksum is %s", checksum)
|
|
49
|
+
|
|
50
|
+
# Store sandbox info
|
|
51
|
+
sandbox_info = SandboxInfo(
|
|
52
|
+
checksum_algorithm=SANDBOX_CHECKSUM_ALGORITHM,
|
|
53
|
+
checksum=checksum,
|
|
54
|
+
size=os.stat(tar_fh.fileno()).st_size,
|
|
55
|
+
format=f"tar.{SANDBOX_COMPRESSION}",
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Create PFN
|
|
59
|
+
pfn = f"{sandbox_info.checksum_algorithm}:{sandbox_info.checksum}.{sandbox_info.format}"
|
|
60
|
+
logger.debug("Sandbox PFN is %s", pfn)
|
|
61
|
+
|
|
62
|
+
# Create sandbox in sandboxstore
|
|
63
|
+
SANDBOX_STORE_DIR.mkdir(exist_ok=True)
|
|
64
|
+
sandbox_path = SANDBOX_STORE_DIR / pfn
|
|
65
|
+
if not sandbox_path.exists():
|
|
66
|
+
with tarfile.open(sandbox_path, "w:gz") as tar:
|
|
67
|
+
for file in paths:
|
|
68
|
+
if not file:
|
|
69
|
+
break
|
|
70
|
+
if isinstance(file, str):
|
|
71
|
+
file = Path(file)
|
|
72
|
+
tar.add(file, arcname=file.name)
|
|
73
|
+
logger.debug("Sandbox uploaded for %s", pfn)
|
|
74
|
+
else:
|
|
75
|
+
logger.debug("Sandbox already exists for %s", pfn)
|
|
76
|
+
return pfn
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def download_sandbox(pfn: str, destination: Path):
|
|
80
|
+
"""Retrieve a sandbox from the sandboxstore and extract it to the given destination.
|
|
81
|
+
|
|
82
|
+
:param pfn: Sandbox PFN
|
|
83
|
+
:param destination: Destination directory
|
|
84
|
+
"""
|
|
85
|
+
logger.debug("Retrieving sandbox for %s", pfn)
|
|
86
|
+
sandbox_archive = SANDBOX_STORE_DIR / pfn
|
|
87
|
+
with tarfile.open(sandbox_archive) as tf:
|
|
88
|
+
tf.extractall(path=Path(destination), filter="data")
|
|
89
|
+
logger.debug("Extracted %s to %s", pfn, destination)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Enhanced metadata registry for DIRAC CWL integration.
|
|
2
|
+
|
|
3
|
+
This module provides a comprehensive plugin system for metadata models,
|
|
4
|
+
supporting virtual organization-specific extensions and automatic discovery.
|
|
5
|
+
|
|
6
|
+
The module maintains backward compatibility with the original API while
|
|
7
|
+
providing enhanced functionality through the new plugin system.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from .core import (
|
|
11
|
+
ExecutionHooksBasePlugin,
|
|
12
|
+
ExecutionHooksHint,
|
|
13
|
+
SchedulingHint,
|
|
14
|
+
TransformationExecutionHooksHint,
|
|
15
|
+
)
|
|
16
|
+
from .registry import (
|
|
17
|
+
ExecutionHooksPluginRegistry,
|
|
18
|
+
discover_plugins,
|
|
19
|
+
get_registry,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# Initialize the registry and discover core plugins
|
|
23
|
+
_registry = get_registry()
|
|
24
|
+
|
|
25
|
+
# Auto-discover plugins on import
|
|
26
|
+
try:
|
|
27
|
+
discover_plugins()
|
|
28
|
+
except Exception:
|
|
29
|
+
# Fail silently if plugin discovery fails during import
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
# Core metadata and plugins
|
|
34
|
+
"ExecutionHooksHint",
|
|
35
|
+
"TransformationExecutionHooksHint",
|
|
36
|
+
"ExecutionHooksBasePlugin",
|
|
37
|
+
"SchedulingHint",
|
|
38
|
+
"ExecutionHooksPluginRegistry",
|
|
39
|
+
"get_registry",
|
|
40
|
+
]
|