PyPI - dirac-cwl - Versions diffs - 1.0.2__py3-none-any.whl - Mend

dirac-cwl 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

dirac_cwl/__init__.py +28 -0
dirac_cwl/commands/__init__.py +5 -0
dirac_cwl/commands/core.py +37 -0
dirac_cwl/commands/download_config.py +22 -0
dirac_cwl/commands/group_outputs.py +32 -0
dirac_cwl/core/__init__.py +1 -0
dirac_cwl/core/exceptions.py +5 -0
dirac_cwl/core/utility.py +41 -0
dirac_cwl/data_management_mocks/data_manager.py +99 -0
dirac_cwl/data_management_mocks/file_catalog.py +132 -0
dirac_cwl/data_management_mocks/sandbox.py +89 -0
dirac_cwl/execution_hooks/__init__.py +40 -0
dirac_cwl/execution_hooks/core.py +342 -0
dirac_cwl/execution_hooks/plugins/__init__.py +16 -0
dirac_cwl/execution_hooks/plugins/core.py +58 -0
dirac_cwl/execution_hooks/registry.py +209 -0
dirac_cwl/job/__init__.py +249 -0
dirac_cwl/job/job_wrapper.py +375 -0
dirac_cwl/job/job_wrapper_template.py +56 -0
dirac_cwl/job/submission_clients.py +166 -0
dirac_cwl/modules/crypto.py +96 -0
dirac_cwl/modules/pi_gather.py +41 -0
dirac_cwl/modules/pi_simulate.py +33 -0
dirac_cwl/production/__init__.py +200 -0
dirac_cwl/submission_models.py +157 -0
dirac_cwl/transformation/__init__.py +203 -0
dirac_cwl-1.0.2.dist-info/METADATA +285 -0
dirac_cwl-1.0.2.dist-info/RECORD +32 -0
dirac_cwl-1.0.2.dist-info/WHEEL +5 -0
dirac_cwl-1.0.2.dist-info/entry_points.txt +8 -0
dirac_cwl-1.0.2.dist-info/licenses/LICENSE +674 -0
dirac_cwl-1.0.2.dist-info/top_level.txt +1 -0

dirac_cwl/job/submission_clients.py ADDED Viewed

@@ -0,0 +1,166 @@
+"""
+Submission client characteristics used in job client.
+This module contains functions to manage job submission to the prototype, DIRAC, and DiracX backends.
+It is not meant to be integrated to DiracX logic itself in the future.
+"""
+from abc import ABC, abstractmethod
+from pathlib import Path
+from diracx.api.jobs import create_sandbox
+from diracx.client.aio import AsyncDiracClient
+from rich.console import Console
+from dirac_cwl.core.utility import get_lfns
+from dirac_cwl.execution_hooks import SchedulingHint
+from dirac_cwl.submission_models import JobModel, JobSubmissionModel
+console = Console()
+class SubmissionClient(ABC):
+    """Abstract base class for job submission strategies."""
+    @abstractmethod
+    async def create_sandbox(self, isb_file_paths: list[Path]) -> str | None:
+        """
+        Upload parameter files to the sandbox store.
+        :param isb_file_paths: List of input sandbox file paths
+        :param parameter_path: Path to the parameter file
+        :return: Sandbox PFN or None
+        """
+        pass
+    @abstractmethod
+    async def submit_job(self, job_submission: JobSubmissionModel) -> bool:
+        """
+        Submit a job to the backend.
+        :param job_submission: Job submission model
+        """
+        pass
+class PrototypeSubmissionClient(SubmissionClient):
+    """Submission client for local/prototype execution."""
+    async def create_sandbox(self, isb_file_paths: list[Path]) -> str | None:
+        """
+        Upload files to the local sandbox store.
+        :param isb_file_paths: List of input sandbox file paths
+        :param parameter_path: Path to the parameter file (not used in local mode)
+        :return: Sandbox PFN or None
+        """
+        from dirac_cwl.data_management_mocks.sandbox import (
+            create_sandbox,
+        )
+        if not isb_file_paths:
+            return None
+        return create_sandbox(paths=isb_file_paths)
+    async def submit_job(self, job_submission: JobSubmissionModel) -> bool:
+        """
+        Submit a job to the backend.
+        :param job_submission: Job submission model
+        """
+        from dirac_cwl.job import submit_job_router
+        result = submit_job_router(job_submission)
+        if result:
+            console.print("[green]:heavy_check_mark:[/green] [bold]CLI:[/bold] Job(s) done.")
+        return result
+class DIRACSubmissionClient(SubmissionClient):
+    """Submission client for DIRAC/DiracX production execution."""
+    async def create_sandbox(
+        self,
+        isb_file_paths: list[Path],
+    ) -> str | None:
+        """
+        Upload parameter files to the sandbox store.
+        :param isb_file_paths: List of input sandbox file paths
+        :return: Sandbox PFN or None
+        """
+        return await create_sandbox(isb_file_paths)
+    async def submit_job(self, job_submission: JobSubmissionModel) -> bool:
+        """
+        Submit a job to the backend.
+        :param job_submission: Job submission model
+        """
+        from dirac_cwl.job import validate_jobs
+        jdls = []
+        job_submission_path = Path("job.json")
+        for job in validate_jobs(job_submission):
+            # Dump the job model to a file
+            with open(job_submission_path, "w") as f:
+                f.write(job.model_dump_json())
+            # Convert job.json to jdl
+            console.print("\t\t[blue]:information_source:[/blue] [bold]CLI:[/bold] Converting job model to jdl...")
+            sandbox_id = await create_sandbox([job_submission_path])
+            job_submission_path.unlink()
+            jdl = self.convert_to_jdl(job, sandbox_id)
+            jdls.append(jdl)
+        console.print("\t\t[blue]:information_source:[/blue] [bold]CLI:[/bold] Call diracx: jobs/jdl router...")
+        async with AsyncDiracClient() as api:
+            jdl_jobs = await api.jobs.submit_jdl_jobs(jdls)
+        console.print(
+            f"\t\t[green]:information_source:[/green] [bold]CLI:[/bold] Inserted {len(jdl_jobs)} jobs with ids:  \
+            {','.join(map(str, (jdl_job.job_id for jdl_job in jdl_jobs)))}"
+        )
+        return True
+    def convert_to_jdl(self, job: JobModel, sandbox_pfn: str) -> str:
+        """
+        Convert job model to jdl.
+        :param job: The task to execute
+        :param sandbox_pfn: The sandbox PFN
+        :return: JDL string
+        """
+        jdl_lines = []
+        jdl_lines.append("Executable = dirac-cwl-exec;")
+        jdl_lines.append("Arguments = job.json;")
+        if job.task.requirements and job.task.requirements[0].coresMin:
+            jdl_lines.append(f"NumberOfProcessors = {job.task.requirements[0].coresMin};")
+        jdl_lines.append("JobName = test;")
+        jdl_lines.append("OutputSandbox = {std.out, std.err};")
+        job_scheduling = SchedulingHint.from_cwl(job.task)
+        if job_scheduling.priority:
+            jdl_lines.append(f"Priority = {job_scheduling.priority};")
+        if job_scheduling.sites:
+            jdl_lines.append(f"Site = {job_scheduling.sites};")
+        jdl_lines.append(f"InputSandbox = {sandbox_pfn};")
+        if job.input:
+            formatted_lfns = []
+            lfns_list = get_lfns(job.input.cwl).values()
+            for lfns in lfns_list:
+                for lfn in lfns:
+                    formatted_lfns.append(str(lfn).replace("lfn:", "LFN:", 1))
+            lfns_str = ", ".join(formatted_lfns)
+            if lfns_str:
+                jdl_lines.append(f"InputData = {lfns_str};")
+        return "\n".join(jdl_lines)

dirac_cwl/modules/crypto.py ADDED Viewed

@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+"""Cryptographic utility functions for CWL workflows."""
+import base64
+import codecs
+import hashlib
+from pathlib import Path
+import typer
+app = typer.Typer()
+def caesar_cipher(text: str, shift: int) -> str:
+    """Apply Caesar cipher encryption to text.
+    :param text: Text to encrypt.
+    :param shift: Number of positions to shift.
+    :return: Encrypted text.
+    """
+    encrypted_text = []
+    for char in text:
+        if char.isalpha():
+            # Shift within alphabet bounds
+            shifted = chr((ord(char.lower()) - 97 + shift) % 26 + 97)
+            encrypted_text.append(shifted if char.islower() else shifted.upper())
+        else:
+            encrypted_text.append(char)
+    return "".join(encrypted_text)
+@app.command("caesar")
+def caesar_command(input_string: str, shift_value: int):
+    """Apply Caesar cipher to the input string with a given shift."""
+    result = caesar_cipher(input_string, shift_value)
+    typer.echo(f"Caesar Cipher Result: {result}")
+    Path("caesar_result.txt").write_text(result)
+def base64_encode(text: str) -> str:
+    """Encode text using Base64 encoding.
+    :param text: Text to encode.
+    :return: Base64 encoded string.
+    """
+    byte_data = text.encode("utf-8")
+    base64_encoded = base64.b64encode(byte_data).decode("utf-8")
+    return base64_encoded
+@app.command("base64")
+def base64_command(input_string: str):
+    """Base64 encode the input string."""
+    result = base64_encode(input_string)
+    typer.echo(f"Base64 Encoded Result: {result}")
+    Path("base64_result.txt").write_text(result)
+def md5_hash(text: str) -> str:
+    """Compute MD5 hash of text.
+    :param text: Text to hash.
+    :return: MD5 hash string.
+    """
+    md5_result = hashlib.md5(text.encode("utf-8")).hexdigest()
+    return md5_result
+@app.command("md5")
+def md5_command(input_string: str):
+    """Compute the MD5 hash of the input string."""
+    result = md5_hash(input_string)
+    typer.echo(f"MD5 Hash Result: {result}")
+    Path("md5_result.txt").write_text(result)
+def rot13_encrypt(text: str) -> str:
+    """Apply ROT13 encryption to text.
+    :param text: Text to encrypt.
+    :return: ROT13 encrypted string.
+    """
+    rot13_result = codecs.encode(text, "rot_13")
+    return rot13_result
+@app.command("rot13")
+def rot13_command(input_string: str):
+    """Apply ROT13 encryption to the input string."""
+    result = rot13_encrypt(input_string)
+    typer.echo(f"ROT13 Result: {result}")
+    Path("rot13_result.txt").write_text(result)
+if __name__ == "__main__":
+    app()

dirac_cwl/modules/pi_gather.py ADDED Viewed

@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+"""Pi estimation gathering module using Monte Carlo results."""
+import math
+from typing import List
+import typer
+from rich.console import Console
+app = typer.Typer()
+console = Console()
+@app.command()
+def process(files: List[str] = typer.Argument(..., help="Paths to the input files")):
+    """Process the input points and estimate the value of Pi using the Monte Carlo method."""
+    inside_circle = 0
+    total_points = 0
+    # Read points from file and check if they fall within the unit circle
+    with open(files[0], "r") as f:
+        for line in f:
+            x, y = map(float, line.split())
+            if math.sqrt(x**2 + y**2) <= 1:
+                inside_circle += 1
+            total_points += 1
+    # Estimate Pi
+    pi_estimate = 4 * (inside_circle / total_points)
+    # Write the result to a file
+    output_name = "result_final.sim"
+    with open(output_name, "w") as f:
+        f.write(f"Approximation of Pi: {pi_estimate}\n")
+    console.print(f"Pi approximation: [bold yellow]{pi_estimate}[/bold yellow]")
+    return output_name
+if __name__ == "__main__":
+    app()

dirac_cwl/modules/pi_simulate.py ADDED Viewed

@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+"""Monte Carlo simulation module for Pi estimation."""
+import random
+import typer
+from rich.console import Console
+app = typer.Typer()
+console = Console()
+@app.command()
+def simulate(num_points: int = typer.Argument(..., help="Number of random points to generate")):
+    """Simulate random points inside a square (Monte Carlo method)."""
+    points = []
+    for _ in range(num_points):
+        x, y = random.uniform(-1, 1), random.uniform(-1, 1)
+        points.append((x, y))
+    # Save points to file
+    output_path = "result.sim"
+    with open(output_path, "w") as f:
+        for point in points:
+            f.write(f"{point[0]} {point[1]}\n")
+    console.print(f"Generated [bold green]{num_points}[/bold green] random points.")
+    return output_path
+if __name__ == "__main__":
+    app()

dirac_cwl/production/__init__.py ADDED Viewed

@@ -0,0 +1,200 @@
+"""CLI interface to run a workflow as a production."""
+import logging
+import os
+from concurrent.futures import ThreadPoolExecutor
+from typing import List, Optional
+import typer
+from cwl_utils.pack import pack
+from cwl_utils.parser import load_document
+from cwl_utils.parser.cwl_v1_2 import (
+    CommandLineTool,
+    ExpressionTool,
+    Workflow,
+    WorkflowInputParameter,
+    WorkflowStep,
+)
+from rich import print_json
+from rich.console import Console
+from schema_salad.exceptions import ValidationException
+from dirac_cwl.submission_models import (
+    ProductionSubmissionModel,
+    TransformationSubmissionModel,
+)
+from dirac_cwl.transformation import (
+    submit_transformation_router,
+)
+app = typer.Typer()
+console = Console()
+# -----------------------------------------------------------------------------
+# dirac-cli commands
+# -----------------------------------------------------------------------------
+@app.command("submit")
+def submit_production_client(
+    task_path: str = typer.Argument(..., help="Path to the CWL file"),
+    # Specific parameter for the purpose of the prototype
+    local: Optional[bool] = typer.Option(True, help="Run the job locally instead of submitting it to the router"),
+):
+    """
+    Correspond to the dirac-cli command to submit productions.
+    This command will:
+    - Validate the workflow
+    - Start the production
+    """
+    os.environ["DIRAC_PROTO_LOCAL"] = "0"
+    # Validate the workflow
+    console.print("[blue]:information_source:[/blue] [bold]CLI:[/bold] Validating the production...")
+    try:
+        task = load_document(pack(task_path))
+    except FileNotFoundError as ex:
+        console.print(f"[red]:heavy_multiplication_x:[/red] [bold]CLI:[/bold] Failed to load the task:\n{ex}")
+        return typer.Exit(code=1)
+    except ValidationException as ex:
+        console.print(f"[red]:heavy_multiplication_x:[/red] [bold]CLI:[/bold] Failed to validate the task:\n{ex}")
+        return typer.Exit(code=1)
+    console.print(f"\t[green]:heavy_check_mark:[/green] Task {task_path}")
+    console.print("\t[green]:heavy_check_mark:[/green] Metadata")
+    # Create the production
+    production = ProductionSubmissionModel(task=task)
+    console.print("[green]:heavy_check_mark:[/green] [bold]CLI:[/bold] Production validated.")
+    # Submit the tranaformation
+    console.print("[blue]:information_source:[/blue] [bold]CLI:[/bold] Submitting the production...")
+    print_json(production.model_dump_json(indent=4))
+    if not submit_production_router(production):
+        console.print("[red]:heavy_multiplication_x:[/red] [bold]CLI:[/bold] Failed to run production.")
+        return typer.Exit(code=1)
+    console.print("[green]:heavy_check_mark:[/green] [bold]CLI:[/bold] Production done.")
+# -----------------------------------------------------------------------------
+# dirac-router commands
+# -----------------------------------------------------------------------------
+def submit_production_router(production: ProductionSubmissionModel) -> bool:
+    """Submit a production to the router.
+    :param production: The production to submit
+    :return: True if the production was submitted successfully, False otherwise
+    """
+    logger = logging.getLogger("ProductionRouter")
+    # Validate the transformation
+    logger.info("Validating the production...")
+    # Already validated by the pydantic model
+    logger.info("Production validated!")
+    # Split the production into transformations
+    logger.info("Creating transformations from production...")
+    transformations = _get_transformations(production)
+    logger.info("%s transformations created!", len(transformations))
+    # Submit the transformations
+    logger.info("Submitting transformations...")
+    with ThreadPoolExecutor() as executor:
+        results = list(executor.map(submit_transformation_router, transformations))
+    return all(results)
+# -----------------------------------------------------------------------------
+# Production management
+# -----------------------------------------------------------------------------
+def _get_transformations(
+    production: ProductionSubmissionModel,
+) -> List[TransformationSubmissionModel]:
+    """Create transformations from a given production.
+    :param production: The production to create transformations from
+    """
+    # Create a subworkflow and a transformation for each step
+    transformations = []
+    for step in production.task.steps:
+        step_task = _create_subworkflow(step, str(production.task.cwlVersion), production.task.inputs)
+        transformations.append(
+            TransformationSubmissionModel(
+                task=step_task,
+            )
+        )
+    return transformations
+def _create_subworkflow(
+    wf_step: WorkflowStep, cwlVersion: str, inputs: List[WorkflowInputParameter]
+) -> Workflow | CommandLineTool | ExpressionTool:
+    """Create a CWL file for a given step.
+    If the step is a workflow, a new workflow is created.
+    If the step is a command line tool, a new command line tool is created.
+    :param wf_step: The step to create a CWL file for
+    :param cwlVersion: The CWL version to use
+    :return: The CWL subworkflow
+    """
+    new_workflow: Workflow | CommandLineTool
+    if wf_step.run.class_ == "Workflow":
+        # Handle nested workflows
+        new_workflow = Workflow(
+            cwlVersion=cwlVersion,
+            inputs=wf_step.run.inputs,
+            outputs=wf_step.run.outputs,
+            steps=wf_step.run.steps,
+            requirements=wf_step.run.requirements,
+        )
+    else:
+        # Handle command line tools
+        new_workflow = CommandLineTool(
+            cwlVersion=cwlVersion,
+            arguments=wf_step.run.arguments,
+            baseCommand=wf_step.run.baseCommand,
+            inputs=wf_step.run.inputs,
+            outputs=wf_step.run.outputs,
+            requirements=wf_step.run.requirements,
+        )
+    # Add the default value to the inputs if any
+    for new_workflow_input in new_workflow.inputs:
+        found_default = False
+        if not new_workflow_input.id:
+            continue
+        new_workflow_input_name = new_workflow_input.id.split("#")[-1].split("/")[-1]
+        for wf_step_in in wf_step.in_:
+            # Skip if the input is not set: this should never happen
+            if not wf_step_in.id:
+                continue
+            if new_workflow_input_name == wf_step_in.id.split("#")[-1].split("/")[-1]:
+                # Find the source input from the original workflow
+                for input in inputs:
+                    # Skip if the input is not set: this should never happen
+                    if not input.id:
+                        continue
+                    if input.id == wf_step_in.source:
+                        new_workflow_input.default = input.default
+                        found_default = True
+                        break
+            if found_default:
+                break
+    return new_workflow

dirac_cwl/submission_models.py ADDED Viewed

@@ -0,0 +1,157 @@
+"""
+Enhanced submission models for DIRAC CWL integration.
+This module provides improved submission models with proper separation of concerns,
+modern Python typing, and comprehensive numpydoc documentation.
+"""
+from __future__ import annotations
+from typing import Any, Optional
+from cwl_utils.parser import save
+from cwl_utils.parser.cwl_v1_2 import (
+    CommandLineTool,
+    ExpressionTool,
+    Workflow,
+)
+from pydantic import BaseModel, ConfigDict, field_serializer, model_validator
+from dirac_cwl.execution_hooks import (
+    ExecutionHooksHint,
+    SchedulingHint,
+    TransformationExecutionHooksHint,
+)
+# -----------------------------------------------------------------------------
+# Job models
+# -----------------------------------------------------------------------------
+class JobInputModel(BaseModel):
+    """Input data and sandbox files for a job execution."""
+    # Allow arbitrary types to be passed to the model
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    sandbox: list[str] | None
+    cwl: dict[str, Any]
+    @field_serializer("cwl")
+    def serialize_cwl(self, value):
+        """Serialize CWL object to dictionary.
+        :param value: CWL object to serialize.
+        :return: Serialized CWL dictionary.
+        """
+        return save(value)
+class BaseJobModel(BaseModel):
+    """Base class for Job definition."""
+    # Allow arbitrary types to be passed to the model
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    task: CommandLineTool | Workflow | ExpressionTool
+    @field_serializer("task")
+    def serialize_task(self, value):
+        """Serialize CWL task object to dictionary.
+        :param value: CWL task object to serialize.
+        :return: Serialized task dictionary.
+        :raises TypeError: If value is not a valid CWL task type.
+        """
+        if isinstance(value, (CommandLineTool, Workflow, ExpressionTool)):
+            return save(value)
+        else:
+            raise TypeError(f"Cannot serialize type {type(value)}")
+    @model_validator(mode="before")
+    def validate_hints(cls, values):
+        """Validate execution hooks and scheduling hints in the task.
+        :param values: Model values dictionary.
+        :return: Validated values dictionary.
+        """
+        task = values.get("task")
+        ExecutionHooksHint.from_cwl(task), SchedulingHint.from_cwl(task)
+        return values
+class JobSubmissionModel(BaseJobModel):
+    """Job definition sent to the router."""
+    inputs: list[JobInputModel] | None = None
+class JobModel(BaseJobModel):
+    """Job definition sent to the job wrapper."""
+    input: Optional[JobInputModel] = None
+# -----------------------------------------------------------------------------
+# Transformation models
+# -----------------------------------------------------------------------------
+class TransformationSubmissionModel(BaseModel):
+    """Transformation definition sent to the router."""
+    # Allow arbitrary types to be passed to the model
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    task: CommandLineTool | Workflow | ExpressionTool
+    @field_serializer("task")
+    def serialize_task(self, value):
+        """Serialize CWL task object to dictionary.
+        :param value: CWL task object to serialize.
+        :return: Serialized task dictionary.
+        :raises TypeError: If value is not a valid CWL task type.
+        """
+        if isinstance(value, (CommandLineTool, Workflow, ExpressionTool)):
+            return save(value)
+        else:
+            raise TypeError(f"Cannot serialize type {type(value)}")
+    @model_validator(mode="before")
+    def validate_hints(cls, values):
+        """Validate transformation execution hooks and scheduling hints in the task.
+        :param values: Model values dictionary.
+        :return: Validated values dictionary.
+        """
+        task = values.get("task")
+        TransformationExecutionHooksHint.from_cwl(task), SchedulingHint.from_cwl(task)
+        return values
+# -----------------------------------------------------------------------------
+# Production models
+# -----------------------------------------------------------------------------
+class ProductionSubmissionModel(BaseModel):
+    """Production definition sent to the router."""
+    # Allow arbitrary types to be passed to the model
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    task: Workflow
+    @field_serializer("task")
+    def serialize_task(self, value):
+        """Serialize CWL workflow object to dictionary.
+        :param value: CWL workflow object to serialize.
+        :return: Serialized workflow dictionary.
+        :raises TypeError: If value is not a valid CWL workflow type.
+        """
+        if isinstance(value, (ExpressionTool, CommandLineTool, Workflow)):
+            return save(value)
+        else:
+            raise TypeError(f"Cannot serialize type {type(value)}")