dirac-cwl 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dirac_cwl/__init__.py +28 -0
- dirac_cwl/commands/__init__.py +5 -0
- dirac_cwl/commands/core.py +37 -0
- dirac_cwl/commands/download_config.py +22 -0
- dirac_cwl/commands/group_outputs.py +32 -0
- dirac_cwl/core/__init__.py +1 -0
- dirac_cwl/core/exceptions.py +5 -0
- dirac_cwl/core/utility.py +41 -0
- dirac_cwl/data_management_mocks/data_manager.py +99 -0
- dirac_cwl/data_management_mocks/file_catalog.py +132 -0
- dirac_cwl/data_management_mocks/sandbox.py +89 -0
- dirac_cwl/execution_hooks/__init__.py +40 -0
- dirac_cwl/execution_hooks/core.py +342 -0
- dirac_cwl/execution_hooks/plugins/__init__.py +16 -0
- dirac_cwl/execution_hooks/plugins/core.py +58 -0
- dirac_cwl/execution_hooks/registry.py +209 -0
- dirac_cwl/job/__init__.py +249 -0
- dirac_cwl/job/job_wrapper.py +375 -0
- dirac_cwl/job/job_wrapper_template.py +56 -0
- dirac_cwl/job/submission_clients.py +166 -0
- dirac_cwl/modules/crypto.py +96 -0
- dirac_cwl/modules/pi_gather.py +41 -0
- dirac_cwl/modules/pi_simulate.py +33 -0
- dirac_cwl/production/__init__.py +200 -0
- dirac_cwl/submission_models.py +157 -0
- dirac_cwl/transformation/__init__.py +203 -0
- dirac_cwl-1.0.2.dist-info/METADATA +285 -0
- dirac_cwl-1.0.2.dist-info/RECORD +32 -0
- dirac_cwl-1.0.2.dist-info/WHEEL +5 -0
- dirac_cwl-1.0.2.dist-info/entry_points.txt +8 -0
- dirac_cwl-1.0.2.dist-info/licenses/LICENSE +674 -0
- dirac_cwl-1.0.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
"""CLI interface to run a workflow as a transformation."""
|
|
2
|
+
|
|
3
|
+
import glob
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import time
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
import typer
|
|
11
|
+
from cwl_utils.pack import pack
|
|
12
|
+
from cwl_utils.parser import load_document
|
|
13
|
+
from cwl_utils.parser.cwl_v1_2 import File
|
|
14
|
+
from rich import print_json
|
|
15
|
+
from rich.console import Console
|
|
16
|
+
from schema_salad.exceptions import ValidationException
|
|
17
|
+
|
|
18
|
+
from dirac_cwl.execution_hooks import (
|
|
19
|
+
TransformationExecutionHooksHint,
|
|
20
|
+
)
|
|
21
|
+
from dirac_cwl.job import submit_job_router
|
|
22
|
+
from dirac_cwl.submission_models import (
|
|
23
|
+
JobInputModel,
|
|
24
|
+
JobSubmissionModel,
|
|
25
|
+
TransformationSubmissionModel,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
app = typer.Typer()
|
|
29
|
+
console = Console()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# -----------------------------------------------------------------------------
|
|
33
|
+
# dirac-cli commands
|
|
34
|
+
# -----------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@app.command("submit")
|
|
38
|
+
def submit_transformation_client(
|
|
39
|
+
task_path: str = typer.Argument(..., help="Path to the CWL file"),
|
|
40
|
+
# Specific parameter for the purpose of the prototype
|
|
41
|
+
local: Optional[bool] = typer.Option(True, help="Run the jobs locally instead of submitting them to the router"),
|
|
42
|
+
):
|
|
43
|
+
"""
|
|
44
|
+
Correspond to the dirac-cli command to submit transformations.
|
|
45
|
+
|
|
46
|
+
This command will:
|
|
47
|
+
- Validate the workflow
|
|
48
|
+
- Start the transformation
|
|
49
|
+
"""
|
|
50
|
+
os.environ["DIRAC_PROTO_LOCAL"] = "0"
|
|
51
|
+
# Validate the workflow
|
|
52
|
+
console.print("[blue]:information_source:[/blue] [bold]CLI:[/bold] Validating the transformation...")
|
|
53
|
+
try:
|
|
54
|
+
task = load_document(pack(task_path))
|
|
55
|
+
except FileNotFoundError as ex:
|
|
56
|
+
console.print(f"[red]:heavy_multiplication_x:[/red] [bold]CLI:[/bold] Failed to load the task:\n{ex}")
|
|
57
|
+
return typer.Exit(code=1)
|
|
58
|
+
except ValidationException as ex:
|
|
59
|
+
console.print(f"[red]:heavy_multiplication_x:[/red] [bold]CLI:[/bold] Failed to validate the task:\n{ex}")
|
|
60
|
+
return typer.Exit(code=1)
|
|
61
|
+
console.print(f"\t[green]:heavy_check_mark:[/green] Task {task_path}")
|
|
62
|
+
|
|
63
|
+
transformation = TransformationSubmissionModel(task=task)
|
|
64
|
+
console.print("[green]:heavy_check_mark:[/green] [bold]CLI:[/bold] Transformation validated.")
|
|
65
|
+
|
|
66
|
+
# Submit the transformation
|
|
67
|
+
console.print("[blue]:information_source:[/blue] [bold]CLI:[/bold] Submitting the transformation...")
|
|
68
|
+
print_json(transformation.model_dump_json(indent=4))
|
|
69
|
+
if not submit_transformation_router(transformation):
|
|
70
|
+
console.print("[red]:heavy_multiplication_x:[/red] [bold]CLI:[/bold] Failed to run transformation.")
|
|
71
|
+
return typer.Exit(code=1)
|
|
72
|
+
console.print("[green]:heavy_check_mark:[/green] [bold]CLI:[/bold] Transformation done.")
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# -----------------------------------------------------------------------------
|
|
76
|
+
# dirac-router commands
|
|
77
|
+
# -----------------------------------------------------------------------------
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def submit_transformation_router(transformation: TransformationSubmissionModel) -> bool:
|
|
81
|
+
"""Execute a transformation using the router.
|
|
82
|
+
|
|
83
|
+
If the transformation is waiting for an input from another transformation,
|
|
84
|
+
it will wait for the input to be available in the "bookkeeping".
|
|
85
|
+
|
|
86
|
+
:param transformation: The transformation to start.
|
|
87
|
+
:return: True if the transformation is executed successfully, False otherwise.
|
|
88
|
+
"""
|
|
89
|
+
logger = logging.getLogger("TransformationRouter")
|
|
90
|
+
|
|
91
|
+
# Validate the transformation
|
|
92
|
+
logger.info("Validating the transformation...")
|
|
93
|
+
# Already validated by the pydantic model
|
|
94
|
+
logger.info("Transformation validated!")
|
|
95
|
+
|
|
96
|
+
# Check if the transformation is waiting for an input
|
|
97
|
+
# - if there is no execution_hooks, the transformation is not waiting for an input and can go on
|
|
98
|
+
# - if there is execution_hooks, the transformation is waiting for an input
|
|
99
|
+
job_model_params = []
|
|
100
|
+
|
|
101
|
+
try:
|
|
102
|
+
transformation_execution_hooks = TransformationExecutionHooksHint.from_cwl(transformation.task)
|
|
103
|
+
except Exception as exc:
|
|
104
|
+
raise ValueError(f"Invalid DIRAC hints:\n{exc}") from exc
|
|
105
|
+
|
|
106
|
+
if transformation_execution_hooks.configuration and transformation_execution_hooks.group_size:
|
|
107
|
+
# Get the metadata class
|
|
108
|
+
transformation_metadata = transformation_execution_hooks.to_runtime(transformation)
|
|
109
|
+
|
|
110
|
+
# Build the input cwl for the jobs to submit
|
|
111
|
+
logger.info("Getting the input data for the transformation...")
|
|
112
|
+
input_data_dict = {}
|
|
113
|
+
min_length = None
|
|
114
|
+
for input_name, group_size in transformation_execution_hooks.group_size.items():
|
|
115
|
+
# Get input query
|
|
116
|
+
logger.info("\t- Getting input query for %s...", input_name)
|
|
117
|
+
input_query = transformation_metadata.get_input_query(input_name)
|
|
118
|
+
if not input_query:
|
|
119
|
+
raise RuntimeError("Input query not found.")
|
|
120
|
+
|
|
121
|
+
# Wait for the input to be available
|
|
122
|
+
logger.info("\t- Waiting for input data for %s...", input_name)
|
|
123
|
+
logger.debug("\t\t- Query: %s", input_query)
|
|
124
|
+
logger.debug("\t\t- Group Size: %s", group_size)
|
|
125
|
+
while not (inputs := _get_inputs(input_query, group_size)):
|
|
126
|
+
logger.debug("\t\t- Result: %s", inputs)
|
|
127
|
+
time.sleep(5)
|
|
128
|
+
logger.info("\t- Input data for %s available.", input_name)
|
|
129
|
+
if not min_length or len(inputs) < min_length:
|
|
130
|
+
min_length = len(inputs)
|
|
131
|
+
|
|
132
|
+
# Update the input data in the metadata
|
|
133
|
+
# Only keep the first min_length inputs
|
|
134
|
+
input_data_dict[input_name] = inputs[:min_length]
|
|
135
|
+
|
|
136
|
+
# Get the JobModelParameter for each input
|
|
137
|
+
job_model_params = _generate_job_model_parameter(input_data_dict)
|
|
138
|
+
logger.info("Input data for the transformation retrieved!")
|
|
139
|
+
|
|
140
|
+
logger.info("Building the jobs...")
|
|
141
|
+
jobs = JobSubmissionModel(
|
|
142
|
+
task=transformation.task,
|
|
143
|
+
inputs=job_model_params,
|
|
144
|
+
)
|
|
145
|
+
logger.info("Jobs built!")
|
|
146
|
+
|
|
147
|
+
logger.info("Submitting jobs...")
|
|
148
|
+
return submit_job_router(jobs)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
# -----------------------------------------------------------------------------
|
|
152
|
+
# Transformation management
|
|
153
|
+
# -----------------------------------------------------------------------------
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _get_inputs(input_query: Path | list[Path], group_size: int) -> List[List[str]]:
|
|
157
|
+
"""Get the input data from the input query.
|
|
158
|
+
|
|
159
|
+
:param input_query: The input query to get the input data
|
|
160
|
+
:param group_size: The number of jobs to group together in a transformation
|
|
161
|
+
:return: A list of lists of paths to the input data, each inner list has length group_size
|
|
162
|
+
"""
|
|
163
|
+
# TODO: how do we know whether a given input has already been processed?
|
|
164
|
+
|
|
165
|
+
# Retrieve all input paths matching the query
|
|
166
|
+
if isinstance(input_query, Path):
|
|
167
|
+
input_paths = glob.glob(str(input_query / "*"), root_dir="filecatalog")
|
|
168
|
+
else:
|
|
169
|
+
input_paths = []
|
|
170
|
+
for query in input_query:
|
|
171
|
+
input_paths.extend(glob.glob(str(query / "*"), root_dir="filecatalog"))
|
|
172
|
+
len_input_paths = len(input_paths)
|
|
173
|
+
|
|
174
|
+
# Ensure there are enough inputs to form at least one group
|
|
175
|
+
if len_input_paths < group_size:
|
|
176
|
+
return []
|
|
177
|
+
|
|
178
|
+
# Calculate the number of full groups
|
|
179
|
+
num_full_groups = len_input_paths // group_size
|
|
180
|
+
|
|
181
|
+
# Group the input paths into lists of size group_size
|
|
182
|
+
input_groups = [input_paths[i * group_size : (i + 1) * group_size] for i in range(num_full_groups)]
|
|
183
|
+
|
|
184
|
+
return input_groups
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _generate_job_model_parameter(
|
|
188
|
+
input_data_dict: Dict[str, List[List[str]]],
|
|
189
|
+
) -> List[JobInputModel]:
|
|
190
|
+
"""Generate job model parameters from input data provided."""
|
|
191
|
+
job_model_params = []
|
|
192
|
+
|
|
193
|
+
input_names = list(input_data_dict.keys())
|
|
194
|
+
input_data_lists = [input_data_dict[input_name] for input_name in input_names]
|
|
195
|
+
grouped_input_data = [dict(zip(input_names, elements)) for elements in zip(*input_data_lists)]
|
|
196
|
+
for group in grouped_input_data:
|
|
197
|
+
cwl_inputs = {}
|
|
198
|
+
for input_name, input_data in group.items():
|
|
199
|
+
cwl_inputs[input_name] = [File(location=str(Path("lfn:") / path)) for path in input_data]
|
|
200
|
+
|
|
201
|
+
job_model_params.append(JobInputModel(sandbox=None, cwl=cwl_inputs))
|
|
202
|
+
|
|
203
|
+
return job_model_params
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dirac-cwl
|
|
3
|
+
Version: 1.0.2
|
|
4
|
+
Summary: Prototype of CWL used as a production/job workflow language
|
|
5
|
+
Author: DIRAC consortium
|
|
6
|
+
License-Expression: GPL-3.0-only
|
|
7
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
8
|
+
Classifier: Intended Audience :: Science/Research
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering
|
|
12
|
+
Classifier: Topic :: System :: Distributed Computing
|
|
13
|
+
Requires-Python: >=3.11
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Requires-Dist: cwl-utils
|
|
17
|
+
Requires-Dist: cwlformat
|
|
18
|
+
Requires-Dist: cwltool
|
|
19
|
+
Requires-Dist: dirac>=9.0.0
|
|
20
|
+
Requires-Dist: diracx-api
|
|
21
|
+
Requires-Dist: lbprodrun
|
|
22
|
+
Requires-Dist: mypy
|
|
23
|
+
Requires-Dist: pydantic
|
|
24
|
+
Requires-Dist: pyyaml
|
|
25
|
+
Requires-Dist: typer
|
|
26
|
+
Requires-Dist: referencing>=0.30
|
|
27
|
+
Requires-Dist: rich
|
|
28
|
+
Requires-Dist: ruamel.yaml
|
|
29
|
+
Provides-Extra: testing
|
|
30
|
+
Requires-Dist: pytest>=6; extra == "testing"
|
|
31
|
+
Requires-Dist: pytest-mock; extra == "testing"
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
<p align="center">
|
|
35
|
+
<img alt="Dirac CWL Logo" src="public/CWLDiracX.png" width="300" >
|
|
36
|
+
</p>
|
|
37
|
+
|
|
38
|
+
# Dirac CWL Prototype
|
|
39
|
+

|
|
40
|
+

|
|
41
|
+
|
|
42
|
+
This Python prototype introduces a command-line interface (CLI) designed for the end-to-end execution of Common Workflow Language (CWL) workflows at different scales. It enables users to locally test CWL workflows, and then run them as jobs, transformations and/or productions.
|
|
43
|
+
|
|
44
|
+
## Prototype Workflow
|
|
45
|
+
|
|
46
|
+
### Local testing
|
|
47
|
+
|
|
48
|
+
Initially, the user tests the CWL workflow locally using `cwltool`. This step involves validating the workflow's structure and ensuring that it executes correctly with the provided inputs.
|
|
49
|
+
|
|
50
|
+
> - CWL task: workflow structure
|
|
51
|
+
> - inputs of the task
|
|
52
|
+
|
|
53
|
+
Once the workflow passes local testing, the user can choose from 3 options for submission depending on the requirements.
|
|
54
|
+
|
|
55
|
+
### Submission methods
|
|
56
|
+
|
|
57
|
+
1. **Submission as Dirac Jobs**: For simple workflows with a limited number of inputs, CWL tasks can be submitted as individual jobs. In this context, they are run locally as if they were run on distributed computing resources. Additionally, users can submit the same workflow with different sets of inputs in a single request, generating multiple jobs at once.
|
|
58
|
+
|
|
59
|
+
> - CWL task
|
|
60
|
+
> - [inputs1, inputs2, ...]
|
|
61
|
+
> - Dirac description (site, priority): Dirac-specific attributes related to scheduling
|
|
62
|
+
> - Metadata (job type): Dirac-specific attributes related to scheduling + execution
|
|
63
|
+
|
|
64
|
+
2. **Submission as Dirac Transformation**: For workflows requiring continuous, real-time input data or large-scale execution, CWL tasks can be submitted as transformations. As new input data becomes available, jobs are automatically generated and executed as jobs. This method is ideal for ongoing data processing and scalable operations.
|
|
65
|
+
|
|
66
|
+
> - CWL task (inputs already described within it)
|
|
67
|
+
> - Dirac description (site, priority)
|
|
68
|
+
> - Metadata (job type, group size, query parameters)
|
|
69
|
+
|
|
70
|
+
3. **Submission as Dirac Productions**: For complex workflows that require multiple steps with different requirements, CWL tasks can be submitted as productions. This method allows the workflow to be split into multiple transformations, with each transformation handling a distinct step in the process. Each transformation can manage one or more jobs, enabling large-scale, multi-step execution.
|
|
71
|
+
|
|
72
|
+
> - CWL task (inputs already described within it)
|
|
73
|
+
> - Step Metadata (per step):
|
|
74
|
+
> - Dirac description (site, priority)
|
|
75
|
+
> - Metadata (job type, group size, query parameters)
|
|
76
|
+
|
|
77
|
+
## Installation (with Pixi)
|
|
78
|
+
|
|
79
|
+
This project uses [Pixi](https://pixi.sh) to manage the development environment and tasks.
|
|
80
|
+
|
|
81
|
+
1) Install Pixi (see official docs for your platform)
|
|
82
|
+
|
|
83
|
+
2) Create and populate the environment
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
pixi install
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
3) Enter the environment (optional)
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
pixi shell
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
That’s it. You can now run commands either inside `pixi shell` or by prefixing with `pixi run`.
|
|
96
|
+
|
|
97
|
+
## Usage
|
|
98
|
+
|
|
99
|
+
Inside the Pixi environment:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
# Either inside a shell
|
|
103
|
+
pixi shell
|
|
104
|
+
|
|
105
|
+
# Submit
|
|
106
|
+
dirac-cwl job submit <workflow_path> [--parameter-path <input_path>] [--metadata-path <metadata_path>]
|
|
107
|
+
|
|
108
|
+
dirac-cwl transformation submit <workflow_path> [--metadata-path <metadata_path>]
|
|
109
|
+
|
|
110
|
+
dirac-cwl production submit <workflow_path> [--steps-metadata-path <steps_metadata_path>]
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Or prefix individual commands:
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
pixi run dirac-cwl job submit <workflow_path> --parameter-path <input_path>
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Common tasks are defined in `pyproject.toml` and can be run with Pixi:
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
# Run tests
|
|
123
|
+
pixi run test
|
|
124
|
+
|
|
125
|
+
# Lint (mypy)
|
|
126
|
+
pixi run lint
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Using cwltool directly
|
|
130
|
+
|
|
131
|
+
To use the workflows and inputs directly with `cwltool`, you need to add the `modules` directory to the `$PATH`:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
export PATH=$PATH:</path/to/dirac-cwl/src/dirac_cwl/modules>
|
|
135
|
+
cwltool <workflow_path> <inputs>
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## Contribute
|
|
139
|
+
|
|
140
|
+
### Add a workflow
|
|
141
|
+
|
|
142
|
+
To add a new workflow to the project, follow these steps:
|
|
143
|
+
|
|
144
|
+
- Create a new directory under `workflows` (e.g. `workflows/helloworld`)
|
|
145
|
+
- Add one or more variants of a workflow under different directory (e.g. `helloworld/helloworld_basic/description.cwl` and `helloworld/helloworld_with_inputs/description.cwl`)
|
|
146
|
+
- In a `type_dependencies` subdirectory, add the required files to submit a job/transformation/production from a given variant.
|
|
147
|
+
|
|
148
|
+
Directory Structure Example:
|
|
149
|
+
|
|
150
|
+
```
|
|
151
|
+
workflows/
|
|
152
|
+
└── my_new_workflow/
|
|
153
|
+
|
|
|
154
|
+
├── my_new_workflow_complete/
|
|
155
|
+
| └── description.cwl
|
|
156
|
+
├── my_new_workflow_step1/
|
|
157
|
+
| └── description.cwl
|
|
158
|
+
├── my_new_workflow_step2/
|
|
159
|
+
| └── description.cwl
|
|
160
|
+
|
|
|
161
|
+
└── type_dependencies/
|
|
162
|
+
├── production/
|
|
163
|
+
| └── steps_metadata.yaml
|
|
164
|
+
├── transformation/
|
|
165
|
+
| └── metadata.yaml
|
|
166
|
+
└── job/
|
|
167
|
+
├── inputs1.yaml
|
|
168
|
+
└── inputs2.yaml
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### Add a Pre/Post-processing commamd and a Job type
|
|
172
|
+
|
|
173
|
+
#### Add a Pre/Post-Command
|
|
174
|
+
|
|
175
|
+
A pre/post-processing command allows the execution of code before and after the workflow.
|
|
176
|
+
|
|
177
|
+
The commands should be stored at the `src/dirac_cwl/commands/` directory
|
|
178
|
+
|
|
179
|
+
To add a new pre/post-processing command to the project, follow these steps:
|
|
180
|
+
|
|
181
|
+
- Create a class that inherits `PreProcessCommand` if it's going to be executed before the workflow or `PostProcessCommand` if it's going to be executed after the workflow. In the rare case that the command can be executed in both stages, it should inherit both classes. These classes are located at `src/dirac_cwl/commands/core.py`.
|
|
182
|
+
|
|
183
|
+
- Implement the `execute` function with the actions it's expected to do. This function recieves the `job path` as a `string` and the dictionary of keyworded arguments `**kwargs`. This function can raise exceptions if it needs to.
|
|
184
|
+
|
|
185
|
+
#### Add a Job Type
|
|
186
|
+
|
|
187
|
+
Job types in `dirac_cwl` have the name of "plugins". These plugins are created from the hints defined in a cwl file.
|
|
188
|
+
|
|
189
|
+
The Job type should be stored at the `src/dirac_cwl/execution_hooks/plugins/` directory and should appear in the `__all__` list of the `__init__.py` file.
|
|
190
|
+
|
|
191
|
+
To add a new Job type to the project, follow these steps:
|
|
192
|
+
|
|
193
|
+
- Create a class that inherits `ExecutionHooksBasePlugin` from `src/dirac_cwl/execution_hooks/core.py`.
|
|
194
|
+
|
|
195
|
+
- Import the pre-processing and post-processing commands that this Job type is going to execute.
|
|
196
|
+
|
|
197
|
+
- Inside the `__init__` function, set the `preprocess_commands` and `postprocess_commands` lists with the commands that each step should execute. Be specially careful in the order, the commands will be executed in the same order they were specified in the lists.
|
|
198
|
+
|
|
199
|
+
In the end, it should look something like this:
|
|
200
|
+
|
|
201
|
+
```python
|
|
202
|
+
class JobTypeExample(ExecutionHooksBasePlugin):
|
|
203
|
+
def __init__(self, **data):
|
|
204
|
+
super().__init__(**data)
|
|
205
|
+
|
|
206
|
+
# ...
|
|
207
|
+
self.preprocess_commands = [PreProcessCmd1, PreProcessCmd2, PreProcessCmd3]
|
|
208
|
+
self.postprocess_commands = [PostProcessCmd1, PostProcessCmd2, PostProcessCmd3]
|
|
209
|
+
# ...
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
In the previous example, `PreProcessCmd1` will be executed before `PreProcessCmd2`, and this will be executed before `PreProcessCmd3`.
|
|
213
|
+
|
|
214
|
+
- Finally, to be able to discover this plugin from the registry, it has to appear in `pyproject.toml` entrypoints at the group `dirac_cwl.execution_hooks`. The previous example would look like:
|
|
215
|
+
|
|
216
|
+
```toml
|
|
217
|
+
[project.entry-points."dirac_cwl.execution_hooks"]
|
|
218
|
+
# ...
|
|
219
|
+
JobTypeExample = "dirac_cwl.execution_hooks.plugins:JobTypeExample"
|
|
220
|
+
# ...
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
### Add a module
|
|
224
|
+
|
|
225
|
+
If your workflow requires calling a script, you can add this script as a module. Follow these steps to properly integrate the module:
|
|
226
|
+
|
|
227
|
+
- Add the script: Place your script in the `src/dirac_cwl/modules` directory.
|
|
228
|
+
- Update `pyproject.toml`: Add the script to the `pyproject.toml` file to create a command-line interface (CLI) command.
|
|
229
|
+
- Reinstall the package: Run `pixi run pip install .` to reinstall the package and make the new script available as a command.
|
|
230
|
+
- Usage in CWL Workflow: Reference the command in your `description.cwl` file.
|
|
231
|
+
|
|
232
|
+
**Example**
|
|
233
|
+
|
|
234
|
+
Let’s say you have a script named `generic_command.py` located at `src/dirac_cwl/modules/generic_command.py`. Here's how you can integrate it:
|
|
235
|
+
|
|
236
|
+
- `generic_command.py` Example Script:
|
|
237
|
+
|
|
238
|
+
```python
|
|
239
|
+
#!/usr/bin/env python3
|
|
240
|
+
import typer
|
|
241
|
+
from rich.console import Console
|
|
242
|
+
|
|
243
|
+
app = typer.Typer()
|
|
244
|
+
console = Console()
|
|
245
|
+
|
|
246
|
+
@app.command()
|
|
247
|
+
def run_example():
|
|
248
|
+
console.print("This is an example command.")
|
|
249
|
+
|
|
250
|
+
if __name__ == "__main__":
|
|
251
|
+
app()
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
- Update `pyproject.toml`:
|
|
255
|
+
|
|
256
|
+
```toml
|
|
257
|
+
[project.scripts]
|
|
258
|
+
generic-command = "dirac_cwl.modules.generic_command:app"
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
- Reinstall the package with:
|
|
262
|
+
|
|
263
|
+
```bash
|
|
264
|
+
pixi run pip install .
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
- Reference in `description.cwl`:
|
|
268
|
+
|
|
269
|
+
```yaml
|
|
270
|
+
baseCommand: [generic-command]
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
### Test your changes
|
|
274
|
+
|
|
275
|
+
- Run tests via Pixi:
|
|
276
|
+
|
|
277
|
+
```bash
|
|
278
|
+
pixi run test
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
- Or directly:
|
|
282
|
+
|
|
283
|
+
```bash
|
|
284
|
+
pixi run pytest test/test_workflows.py -v
|
|
285
|
+
```
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
dirac_cwl/__init__.py,sha256=ta9RLzO5LQ9UzW1yiZ3OYY5fmQiWP2zjw0sbXvJnh6g,758
|
|
2
|
+
dirac_cwl/submission_models.py,sha256=5jr6ScwZm9zloaRS-k29LDQiPZHDhdQAUs1xmH1BFi4,4935
|
|
3
|
+
dirac_cwl/commands/__init__.py,sha256=yADJYLzbM8oYt04f0vpJ3ju1Ts2SIc5lW9M8Jle0L7Q,179
|
|
4
|
+
dirac_cwl/commands/core.py,sha256=vXqAMD7KGL5MStSP-JTRWCPUEmTcmj-TwIKgjHGSGpY,1232
|
|
5
|
+
dirac_cwl/commands/download_config.py,sha256=dOwhWMhyD2bVzO8i2uFm5nlgrG-Lh8xI-pYxTAwvWQs,633
|
|
6
|
+
dirac_cwl/commands/group_outputs.py,sha256=E6XTan9mQA5ux50wKx_OwBx9ZVRqO45vvJmY7Uc81z8,1133
|
|
7
|
+
dirac_cwl/core/__init__.py,sha256=4gQdN890Y--wTXMojmNNYg0qzPzFJSIz9Wh4cUgBVzA,30
|
|
8
|
+
dirac_cwl/core/exceptions.py,sha256=x_15wckaEhPE60DHPUQK0gv6zBkhSZBr6K6kZDS-1rQ,157
|
|
9
|
+
dirac_cwl/core/utility.py,sha256=4e6NcsQr_0O-mDHvJTTbfMiqkzSgSLtgyEDzXLsbDX8,1492
|
|
10
|
+
dirac_cwl/data_management_mocks/data_manager.py,sha256=QUADgjcpb60mlcvyHLOjbKpQblCHGmeoba1WLkJmMYU,4271
|
|
11
|
+
dirac_cwl/data_management_mocks/file_catalog.py,sha256=xpIcLcbELXuSVq-DcgAd-vbQ0SXiie9ZnkUx9FnjTGE,4544
|
|
12
|
+
dirac_cwl/data_management_mocks/sandbox.py,sha256=bueqArIZR6kZVVEb258Dpx445A_Odz3X_9TqoZOmnsc,3321
|
|
13
|
+
dirac_cwl/execution_hooks/__init__.py,sha256=DkZPSJ362opbfc0b6WbxmSa6bT-6s9rWFB2MjLV6dKk,1036
|
|
14
|
+
dirac_cwl/execution_hooks/core.py,sha256=vn8SI_Ow16qx_yl4YmzDSfa6sooMKgkXYpx_7MmhML8,12417
|
|
15
|
+
dirac_cwl/execution_hooks/registry.py,sha256=aDf0yggUvOFkOLFO--YEU3zML5TzH27_MNk6wYuHPNg,7838
|
|
16
|
+
dirac_cwl/execution_hooks/plugins/__init__.py,sha256=TKtfC1FUV1vRgcG0DnhoWqlyqaj2M1pysqTxh_Vlybo,324
|
|
17
|
+
dirac_cwl/execution_hooks/plugins/core.py,sha256=5oSB3aobAV9l4lMvta7wv3EejyXIX5kpB55X1uQguAo,1987
|
|
18
|
+
dirac_cwl/job/__init__.py,sha256=6ENcM9E930jSIhWBmXm_QPxOk31uduYtQ3eTirEw7xo,7809
|
|
19
|
+
dirac_cwl/job/job_wrapper.py,sha256=26Q5ywkmexPuxEBtOo3taZdOIwGYPRJeDT1tcunsvSM,14693
|
|
20
|
+
dirac_cwl/job/job_wrapper_template.py,sha256=sQ1XuKPmo6qFzRra3qZSlk8sPTgi1VTP3Q1bq67YCBk,1458
|
|
21
|
+
dirac_cwl/job/submission_clients.py,sha256=WrieGuHg-JRfu1c0QxMTXk6zS39rTagGEFTG04DOGWg,5528
|
|
22
|
+
dirac_cwl/modules/crypto.py,sha256=QS4NWQYiFyWSTBvRjm0O7OS_f6hm-_K9QfBMP6ngPX8,2574
|
|
23
|
+
dirac_cwl/modules/pi_gather.py,sha256=-a3SBztxi4FrMKLttpJ3JgyZ5gcRpReQSqykNpO34fs,1120
|
|
24
|
+
dirac_cwl/modules/pi_simulate.py,sha256=br_c8wQKrGveGrsFEPDEQggsPbd0b2J1rSW4i27lfCs,826
|
|
25
|
+
dirac_cwl/production/__init__.py,sha256=NvPI2UKLjFHL8a2TQGNOD4_yh6mHvn9ugZ4iWOTgOSw,6983
|
|
26
|
+
dirac_cwl/transformation/__init__.py,sha256=SJP8HUaH-huYphE-Gxhm4I4vVyhHze69tVSvAS007wI,8151
|
|
27
|
+
dirac_cwl-1.0.2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
28
|
+
dirac_cwl-1.0.2.dist-info/METADATA,sha256=zzNL0gyaTMLSH-Uce7E0m-V50uO90B-C8fUA-Q4joPQ,10036
|
|
29
|
+
dirac_cwl-1.0.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
30
|
+
dirac_cwl-1.0.2.dist-info/entry_points.txt,sha256=vJW7SQRh8klf-hUG2SG3IbtxLpl8HucZsk44oQxRo4g,273
|
|
31
|
+
dirac_cwl-1.0.2.dist-info/top_level.txt,sha256=s23lab7e3RtDqMonc5OlaSymqnNpCjv2FIBwO4-P2wc,10
|
|
32
|
+
dirac_cwl-1.0.2.dist-info/RECORD,,
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
[console_scripts]
|
|
2
|
+
crypto = dirac_cwl.modules.crypto:app
|
|
3
|
+
dirac-cwl = dirac_cwl:app
|
|
4
|
+
pi-gather = dirac_cwl.modules.pi_gather:app
|
|
5
|
+
pi-simulate = dirac_cwl.modules.pi_simulate:app
|
|
6
|
+
|
|
7
|
+
[dirac_cwl.execution_hooks]
|
|
8
|
+
QueryBasedPlugin = dirac_cwl.execution_hooks.plugins:QueryBasedPlugin
|