dirac-cwl 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dirac_cwl/__init__.py ADDED
@@ -0,0 +1,28 @@
1
+ """DIRAC CWL Proto - Common Workflow Language integration for DIRAC."""
2
+
3
+ import logging
4
+ from importlib.metadata import PackageNotFoundError, version
5
+
6
+ import typer
7
+
8
+ from dirac_cwl.job import app as job_app
9
+ from dirac_cwl.production import app as production_app
10
+ from dirac_cwl.transformation import app as transformation_app
11
+
12
+ logging.basicConfig(level=logging.DEBUG, format="%(asctime)s | %(name)s | %(levelname)s | %(message)s")
13
+
14
+ try:
15
+ __version__ = version("dirac-cwl")
16
+ except PackageNotFoundError:
17
+ # package is not installed
18
+ pass
19
+
20
+ app = typer.Typer()
21
+
22
+ # Add sub-apps
23
+ app.add_typer(production_app, name="production")
24
+ app.add_typer(transformation_app, name="transformation")
25
+ app.add_typer(job_app, name="job")
26
+
27
+ if __name__ == "__main__":
28
+ app()
@@ -0,0 +1,5 @@
1
+ """Command classes for workflow pre/post-processing operations."""
2
+
3
+ from .core import PostProcessCommand, PreProcessCommand
4
+
5
+ __all__ = ["PreProcessCommand", "PostProcessCommand"]
@@ -0,0 +1,37 @@
1
+ """Core base classes for workflow processing commands."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from pathlib import Path
5
+
6
+
7
+ class CommandBase(ABC):
8
+ """Base abstract class for pre/post-processing commands.
9
+
10
+ New commands **MUST NOT** inherit this class. Instead they should inherit the interface classes
11
+ :class:`dirac_cwl.commands.base.PreProcessCommand` and
12
+ :class:`dirac_cwl.commands.base.PostProcessCommand`
13
+ """
14
+
15
+ @abstractmethod
16
+ def execute(self, job_path: Path, **kwargs) -> None:
17
+ """Execute the command in the given job path.
18
+
19
+ :param job_path: Path to the job working directory.
20
+ :param kwargs: Additional keyword arguments.
21
+ :raises NotImplementedError: This method must be implemented by subclasses.
22
+ """
23
+ raise NotImplementedError("This method should be implemented by child class")
24
+
25
+
26
+ class PreProcessCommand(CommandBase):
27
+ """Interface class for pre-processing commands.
28
+
29
+ Every pre-processing command must inherit this class. Used for type validation.
30
+ """
31
+
32
+
33
+ class PostProcessCommand(CommandBase):
34
+ """Interface class for post-processing commands.
35
+
36
+ Every post-processing command must inherit this class. Used for type validation.
37
+ """
@@ -0,0 +1,22 @@
1
+ """Example pre-processing command that downloads configuration."""
2
+
3
+ import os
4
+
5
+ from dirac_cwl.commands import PreProcessCommand
6
+
7
+
8
+ class DownloadConfig(PreProcessCommand):
9
+ """Example command that creates a file with named 'content.cfg'."""
10
+
11
+ def execute(self, job_path, **kwargs):
12
+ """Execute the configuration download.
13
+
14
+ :param job_path: Path to the job working directory.
15
+ :param kwargs: Additional keyword arguments.
16
+ """
17
+ content = """\
18
+ This is an example
19
+ """
20
+ file_path = os.path.join(job_path, "content.cfg")
21
+ with open(file_path, "w") as f:
22
+ f.write(content)
@@ -0,0 +1,32 @@
1
+ """Example post-processing command that groups output files."""
2
+
3
+ import glob
4
+ import os
5
+
6
+ from dirac_cwl.commands import PostProcessCommand
7
+
8
+
9
+ class GroupOutputs(PostProcessCommand):
10
+ """Example command that merges all of the outputs in a singular file."""
11
+
12
+ def execute(self, job_path, **kwargs):
13
+ """Execute the output file grouping.
14
+
15
+ :param job_path: Path to the job working directory.
16
+ :param kwargs: Additional keyword arguments.
17
+ """
18
+ grouped_outputs = "group.out"
19
+ output_path = os.path.join(job_path, grouped_outputs)
20
+ output_files = ["*.out", "*.txt"]
21
+
22
+ with open(output_path, "w", encoding="utf-8") as f_in:
23
+ for file_type in output_files:
24
+ extension = f"{job_path}/{file_type}"
25
+ for file in glob.glob(extension):
26
+ if file == output_path:
27
+ continue
28
+
29
+ with open(file, "r", encoding="utf-8") as f_out:
30
+ f_in.write(f"############ {file}\n")
31
+ f_in.writelines(f_out.readlines())
32
+ f_in.write("\n")
@@ -0,0 +1 @@
1
+ """Dirac-cwl core package."""
@@ -0,0 +1,5 @@
1
+ """Dirac-cwl exceptions module."""
2
+
3
+
4
+ class WorkflowProcessingException(Exception):
5
+ """Workflow exception raised during pre and post processing steps."""
@@ -0,0 +1,41 @@
1
+ """Utility functions for file catalog operations."""
2
+
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ from cwl_utils.parser.cwl_v1_2 import (
7
+ File,
8
+ )
9
+
10
+
11
+ def get_lfns(input_data: dict[str, Any]) -> dict[str, list[Path]]:
12
+ """Get the list of LFNs in the inputs from the parameters.
13
+
14
+ :param input_data: The parameters of the job.
15
+ :return: The list of LFN paths.
16
+ """
17
+ # Get the files from the input data
18
+ files: dict[str, list[Path]] = {}
19
+ for input_name, input_value in input_data.items():
20
+ val = []
21
+ if isinstance(input_value, list):
22
+ for item in input_value:
23
+ if isinstance(item, File):
24
+ if not item.location and not item.path:
25
+ raise NotImplementedError("File location is not defined.")
26
+
27
+ if not item.location:
28
+ continue
29
+ # Skip files from the File Catalog
30
+ if item.location.startswith("lfn:"):
31
+ val.append(Path(item.location))
32
+ files[input_name] = val
33
+ elif isinstance(input_value, File):
34
+ if not input_value.location and not input_value.path:
35
+ raise NotImplementedError("File location is not defined.")
36
+ if not input_value.location:
37
+ continue
38
+ if input_value.location.startswith("lfn:"):
39
+ val.append(Path(input_value.location))
40
+ files[input_name] = val
41
+ return files
@@ -0,0 +1,99 @@
1
+ """Mock DIRAC data manager for local file storage operations."""
2
+
3
+ from pathlib import Path
4
+
5
+ from DIRAC.DataManagementSystem.Client.DataManager import DataManager # type: ignore[import-untyped]
6
+ from DIRAC.Resources.Storage.FileStorage import FileStorage # type: ignore[import-untyped]
7
+ from DIRACCommon.Core.Utilities.ReturnValues import S_ERROR, S_OK, returnSingleResult # type: ignore[import-untyped]
8
+
9
+ from dirac_cwl.data_management_mocks.file_catalog import LocalFileCatalog
10
+
11
+
12
+ class MockDataManager(DataManager):
13
+ """Mock DIRAC DataManager for local file storage."""
14
+
15
+ def __init__(self):
16
+ """Initialize the mock data manager with local file catalog."""
17
+ self.base_storage_path = "filecatalog"
18
+ self.storage_element = FileStorage("local", {"Path": self.base_storage_path})
19
+ self.fileCatalog = LocalFileCatalog()
20
+
21
+ def getFile(self, lfn, destinationDir=".", sourceSE=None, diskOnly=False):
22
+ """Get local copy of LFN(s) from Storage Elements.
23
+
24
+ :param mixed lfn: a single LFN or list of LFNs.
25
+ :param str destinationDir: directory to which the file(s) will be
26
+ downloaded. (Default: current working directory).
27
+ :param str sourceSE: source SE from which to download (Default: all replicas will be attempted).
28
+ :param bool diskOnly: chooses the disk ONLY replica(s). (Default: False)
29
+ :return: S_OK({"Successful": {}, "Failed": {}})/S_ERROR(errMessage).
30
+ """
31
+ if isinstance(lfn, list):
32
+ lfns = lfn
33
+ elif isinstance(lfn, str):
34
+ lfns = [lfn]
35
+ else:
36
+ return S_ERROR(f"wrong type for lfn: {lfn}, expected str or list[str]")
37
+
38
+ if not sourceSE:
39
+ sourceSE = self.storage_element
40
+
41
+ success = {}
42
+ fail = {}
43
+ for lfn in lfns:
44
+ res = sourceSE.getFile(
45
+ str(
46
+ Path(self.base_storage_path) / str(lfn).removeprefix("lfn:").removeprefix("LFN:").removeprefix("/")
47
+ ),
48
+ destinationDir,
49
+ )
50
+ if not res["OK"]:
51
+ fail[lfn] = res["Message"]
52
+ elif res["Value"]["Failed"]:
53
+ fail[lfn] = res["Value"]["Failed"]
54
+ else:
55
+ success[lfn] = str(Path(destinationDir).resolve() / Path(lfn).name)
56
+ return S_OK({"Successful": success, "Failed": fail})
57
+
58
+ def putAndRegister(
59
+ self,
60
+ lfn,
61
+ fileName,
62
+ diracSE,
63
+ guid=None,
64
+ path=None,
65
+ checksum=None,
66
+ overwrite=None,
67
+ ):
68
+ """Put a local file to a Storage Element and register in the File Catalogues.
69
+
70
+ :param lfn: The file LFN.
71
+ :param fileName: The full path to the local file.
72
+ :param diracSE: The Storage Element to which to put the file.
73
+ :param guid: The guid with which the file is to be registered (if not provided will be generated).
74
+ :param path: The path on the storage where the file will be put (if not provided the LFN will be used).
75
+ :param checksum: File checksum (optional).
76
+ :param overwrite: Removes file from the file catalogue and SE before attempting upload.
77
+ """
78
+ self.fileCatalog.addFile(lfn)
79
+ return self.put(lfn, fileName, diracSE, path)
80
+
81
+ def put(self, lfn, fileName, diracSE, path=None):
82
+ """Put a local file to a Storage Element.
83
+
84
+ :param str lfn: LFN.
85
+ :param str fileName: The full path to the local file.
86
+ :param str diracSE: The Storage Element to which to put the file.
87
+ :param str path: The path on the storage where the file will be put (if not provided the LFN will be used).
88
+ :return: S_OK({"Successful": {...}, "Failed": {...}})/S_ERROR(errMessage).
89
+ """
90
+ se = self.storage_element
91
+ if not se:
92
+ return S_ERROR("No Storage Element defined")
93
+ if not path:
94
+ path = str(lfn).removeprefix("lfn:").removeprefix("LFN:").removeprefix("/")
95
+ dest = str(Path(self.base_storage_path) / Path(path))
96
+ res = returnSingleResult(se.putFile({dest: fileName}))
97
+ if not res["OK"]:
98
+ return S_OK({"Successful": {}, "Failed": {lfn: res["Message"]}})
99
+ return S_OK({"Successful": {lfn: res["Value"]}, "Failed": {}})
@@ -0,0 +1,132 @@
1
+ """Mock file catalog implementations for local testing."""
2
+
3
+ import json
4
+ import time
5
+ from pathlib import Path
6
+
7
+ from DIRAC import S_ERROR, S_OK # type: ignore[import-untyped]
8
+ from DIRAC.Resources.Catalog.FileCatalog import FileCatalog # type: ignore[import-untyped]
9
+
10
+
11
+ class InMemoryFileCatalog(FileCatalog):
12
+ """Minimal in-memory FileCatalog compatible with DIRAC DataManager."""
13
+
14
+ def __init__(self, catalogs=None, vo=None):
15
+ """Initialize the in-memory file catalog.
16
+
17
+ :param catalogs: Catalog configuration (unused).
18
+ :param vo: Virtual organization (unused).
19
+ """
20
+ self._eligibleCatalogs = {}
21
+ self._files = {} # store metadata and logical file names
22
+ super(FileCatalog, self).__init__()
23
+
24
+ def _getEligibleCatalogs(self):
25
+ """Get eligible catalogs for this file catalog.
26
+
27
+ :return: S_OK with catalog configuration.
28
+ """
29
+ self._eligibleCatalogs = {"MyMockCatalog": {"Type": "MockFileCatalog", "Backend": "Memory"}}
30
+ return S_OK(self._eligibleCatalogs)
31
+
32
+ def findFile(self, lfn):
33
+ """Find a file in the catalog by LFN.
34
+
35
+ :param lfn: Logical file name.
36
+ :return: S_OK with file metadata or S_ERROR if not found.
37
+ """
38
+ if lfn in self._files:
39
+ return S_OK([self._files[lfn]])
40
+ return S_ERROR(f"File {lfn} not found")
41
+
42
+ def addFile(self, lfn, metadata=None):
43
+ """Add a file to the catalog.
44
+
45
+ :param lfn: Logical file name.
46
+ :param metadata: Optional file metadata.
47
+ :return: S_OK with LFN or S_ERROR if file already exists.
48
+ """
49
+ if lfn in self._files:
50
+ return S_ERROR(f"File {lfn} already exists")
51
+ self._files[lfn] = {"LFN": lfn, "Metadata": metadata or {}}
52
+ return S_OK(lfn)
53
+
54
+
55
+ class LocalFileCatalog(FileCatalog):
56
+ """File catalog implementation using local filesystem storage."""
57
+
58
+ def __init__(self, catalogs=None, vo=None):
59
+ """Initialize the local file catalog.
60
+
61
+ :param catalogs: Catalog configuration (unused).
62
+ :param vo: Virtual organization (unused).
63
+ """
64
+ self._eligibleCatalogs = {"MyMockCatalog": {"Type": "MockFileCatalog", "Backend": "LocalFileSystem"}}
65
+ self._metadataPath = "filecatalog/metadata.json"
66
+ super(FileCatalog, self).__init__()
67
+
68
+ def _getEligibleCatalogs(self):
69
+ """Get eligible catalogs for this file catalog.
70
+
71
+ :return: S_OK with catalog configuration.
72
+ """
73
+ return S_OK(self._eligibleCatalogs)
74
+
75
+ def getFileMetadata(self, lfn):
76
+ """Get metadata for a file.
77
+
78
+ :param lfn: Logical file name.
79
+ :return: S_OK with metadata dict or failed dict.
80
+ """
81
+ metaAll = self._getAllMetadata()
82
+ if lfn not in metaAll:
83
+ return S_OK({"Successful": {}, "Failed": {lfn: f"File {lfn} not found"}})
84
+ return S_OK({"Successful": {lfn: metaAll[lfn]}, "Failed": {}})
85
+
86
+ def addFile(self, lfn):
87
+ """Add a file to the catalog.
88
+
89
+ :param lfn: Logical file name.
90
+ :return: S_OK with success/failed dict or S_ERROR if file exists.
91
+ """
92
+ if lfn in self._getAllMetadata():
93
+ return S_ERROR(f"File {lfn} already exists")
94
+ self.setMetadata(lfn, {"CreationDate": time.time()})
95
+ return S_OK({"Successful": {lfn: True}, "Failed": {}})
96
+
97
+ def setMetadata(self, lfn, metadataDict):
98
+ """Set metadata for a file.
99
+
100
+ :param lfn: Logical file name.
101
+ :param metadataDict: Metadata dictionary to set.
102
+ :return: S_OK with success/failed dict or S_ERROR on failure.
103
+ """
104
+ meta = self._getAllMetadata()
105
+ meta[lfn] = metadataDict
106
+
107
+ try:
108
+ self._setAllMetadata(meta)
109
+ except Exception as e:
110
+ return S_ERROR(f"Could set metadata: {e}")
111
+ return S_OK({"Successful": {lfn: True}, "Failed": {}})
112
+
113
+ def _getAllMetadata(self):
114
+ """Get all metadata from the local file.
115
+
116
+ :return: Dictionary of all file metadata.
117
+ """
118
+ try:
119
+ with open(self._metadataPath, "r") as file:
120
+ meta = json.load(file)
121
+ except Exception:
122
+ meta = {}
123
+ return meta
124
+
125
+ def _setAllMetadata(self, metadata):
126
+ """Save all metadata to the local file.
127
+
128
+ :param metadata: Dictionary of file metadata to save.
129
+ """
130
+ Path(self._metadataPath).parent.mkdir(parents=True, exist_ok=True)
131
+ with open(self._metadataPath, "w+") as file:
132
+ json.dump(metadata, file)
@@ -0,0 +1,89 @@
1
+ """Mock DIRAC sandbox store client for local file operations."""
2
+
3
+ import hashlib
4
+ import logging
5
+ import os
6
+ import tarfile
7
+ import tempfile
8
+ from pathlib import Path
9
+ from typing import Literal, Sequence
10
+
11
+ import zstandard
12
+ from diracx.core.models import SandboxInfo
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ SANDBOX_CHECKSUM_ALGORITHM = "sha256"
17
+ SANDBOX_COMPRESSION: Literal["zst"] = "zst"
18
+
19
+ # Get the project root directory (where pyproject.toml is located)
20
+ PROJECT_ROOT = Path(__file__).parent.parent.parent.parent
21
+ SANDBOX_STORE_DIR = PROJECT_ROOT / "sandboxstore"
22
+
23
+
24
+ def create_sandbox(paths: Sequence[str | Path]):
25
+ """Upload a sandbox archive to the sandboxstore.
26
+
27
+ :param paths: File paths to be uploaded in the sandbox.
28
+ """
29
+ with tempfile.TemporaryFile(mode="w+b") as tar_fh:
30
+ # Create zstd compressed tar with level 18 and long matching enabled
31
+ compression_params = zstandard.ZstdCompressionParameters.from_level(18, enable_ldm=1)
32
+ cctx = zstandard.ZstdCompressor(compression_params=compression_params)
33
+ with cctx.stream_writer(tar_fh, closefd=False) as compressor:
34
+ with tarfile.open(fileobj=compressor, mode="w|") as tf:
35
+ for path in paths:
36
+ if isinstance(path, str):
37
+ path = Path(path)
38
+ logger.debug("Adding %s to sandbox as %s", path.resolve(), path.name)
39
+ tf.add(path.resolve(), path.name, recursive=True)
40
+ tar_fh.seek(0)
41
+
42
+ # Generate sandbox checksum
43
+ hasher = getattr(hashlib, SANDBOX_CHECKSUM_ALGORITHM)()
44
+ while data := tar_fh.read(512 * 1024):
45
+ hasher.update(data)
46
+ checksum = hasher.hexdigest()
47
+ tar_fh.seek(0)
48
+ logger.debug("Sandbox checksum is %s", checksum)
49
+
50
+ # Store sandbox info
51
+ sandbox_info = SandboxInfo(
52
+ checksum_algorithm=SANDBOX_CHECKSUM_ALGORITHM,
53
+ checksum=checksum,
54
+ size=os.stat(tar_fh.fileno()).st_size,
55
+ format=f"tar.{SANDBOX_COMPRESSION}",
56
+ )
57
+
58
+ # Create PFN
59
+ pfn = f"{sandbox_info.checksum_algorithm}:{sandbox_info.checksum}.{sandbox_info.format}"
60
+ logger.debug("Sandbox PFN is %s", pfn)
61
+
62
+ # Create sandbox in sandboxstore
63
+ SANDBOX_STORE_DIR.mkdir(exist_ok=True)
64
+ sandbox_path = SANDBOX_STORE_DIR / pfn
65
+ if not sandbox_path.exists():
66
+ with tarfile.open(sandbox_path, "w:gz") as tar:
67
+ for file in paths:
68
+ if not file:
69
+ break
70
+ if isinstance(file, str):
71
+ file = Path(file)
72
+ tar.add(file, arcname=file.name)
73
+ logger.debug("Sandbox uploaded for %s", pfn)
74
+ else:
75
+ logger.debug("Sandbox already exists for %s", pfn)
76
+ return pfn
77
+
78
+
79
+ def download_sandbox(pfn: str, destination: Path):
80
+ """Retrieve a sandbox from the sandboxstore and extract it to the given destination.
81
+
82
+ :param pfn: Sandbox PFN
83
+ :param destination: Destination directory
84
+ """
85
+ logger.debug("Retrieving sandbox for %s", pfn)
86
+ sandbox_archive = SANDBOX_STORE_DIR / pfn
87
+ with tarfile.open(sandbox_archive) as tf:
88
+ tf.extractall(path=Path(destination), filter="data")
89
+ logger.debug("Extracted %s to %s", pfn, destination)
@@ -0,0 +1,40 @@
1
+ """Enhanced metadata registry for DIRAC CWL integration.
2
+
3
+ This module provides a comprehensive plugin system for metadata models,
4
+ supporting virtual organization-specific extensions and automatic discovery.
5
+
6
+ The module maintains backward compatibility with the original API while
7
+ providing enhanced functionality through the new plugin system.
8
+ """
9
+
10
+ from .core import (
11
+ ExecutionHooksBasePlugin,
12
+ ExecutionHooksHint,
13
+ SchedulingHint,
14
+ TransformationExecutionHooksHint,
15
+ )
16
+ from .registry import (
17
+ ExecutionHooksPluginRegistry,
18
+ discover_plugins,
19
+ get_registry,
20
+ )
21
+
22
+ # Initialize the registry and discover core plugins
23
+ _registry = get_registry()
24
+
25
+ # Auto-discover plugins on import
26
+ try:
27
+ discover_plugins()
28
+ except Exception:
29
+ # Fail silently if plugin discovery fails during import
30
+ pass
31
+
32
+ __all__ = [
33
+ # Core metadata and plugins
34
+ "ExecutionHooksHint",
35
+ "TransformationExecutionHooksHint",
36
+ "ExecutionHooksBasePlugin",
37
+ "SchedulingHint",
38
+ "ExecutionHooksPluginRegistry",
39
+ "get_registry",
40
+ ]