dkist-processing-core 4.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- changelog/.gitempty +0 -0
- dkist_processing_core/__init__.py +13 -0
- dkist_processing_core/build_utils.py +139 -0
- dkist_processing_core/config.py +82 -0
- dkist_processing_core/failure_callback.py +96 -0
- dkist_processing_core/node.py +169 -0
- dkist_processing_core/resource_queue.py +9 -0
- dkist_processing_core/task.py +250 -0
- dkist_processing_core/tests/__init__.py +1 -0
- dkist_processing_core/tests/conftest.py +172 -0
- dkist_processing_core/tests/invalid_workflow_cyclic/__init__.py +1 -0
- dkist_processing_core/tests/invalid_workflow_cyclic/workflow.py +21 -0
- dkist_processing_core/tests/invalid_workflow_for_docker_multi_category/__init__.py +0 -0
- dkist_processing_core/tests/invalid_workflow_for_docker_multi_category/workflow.py +21 -0
- dkist_processing_core/tests/task_example.py +45 -0
- dkist_processing_core/tests/test_build_utils.py +128 -0
- dkist_processing_core/tests/test_export.py +71 -0
- dkist_processing_core/tests/test_failure_callback.py +90 -0
- dkist_processing_core/tests/test_node.py +156 -0
- dkist_processing_core/tests/test_task.py +82 -0
- dkist_processing_core/tests/test_workflow.py +212 -0
- dkist_processing_core/tests/valid_workflow_package/__init__.py +1 -0
- dkist_processing_core/tests/valid_workflow_package/workflow.py +21 -0
- dkist_processing_core/tests/zero_node_workflow_package/__init__.py +1 -0
- dkist_processing_core/tests/zero_node_workflow_package/workflow.py +9 -0
- dkist_processing_core/workflow.py +294 -0
- dkist_processing_core-4.3.0.dist-info/METADATA +249 -0
- dkist_processing_core-4.3.0.dist-info/RECORD +41 -0
- dkist_processing_core-4.3.0.dist-info/WHEEL +5 -0
- dkist_processing_core-4.3.0.dist-info/top_level.txt +4 -0
- docs/Makefile +134 -0
- docs/auto-proc-concept-model.png +0 -0
- docs/auto_proc_brick.png +0 -0
- docs/automated-processing-deployed.png +0 -0
- docs/changelog.rst +6 -0
- docs/conf.py +50 -0
- docs/index.rst +9 -0
- docs/landing_page.rst +34 -0
- docs/make.bat +170 -0
- docs/requirements.txt +1 -0
- licenses/LICENSE.rst +11 -0
changelog/.gitempty
ADDED
|
File without changes
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Package-level setup information."""
|
|
2
|
+
from importlib.metadata import PackageNotFoundError
|
|
3
|
+
from importlib.metadata import version
|
|
4
|
+
|
|
5
|
+
from dkist_processing_core.resource_queue import ResourceQueue
|
|
6
|
+
from dkist_processing_core.task import TaskBase
|
|
7
|
+
from dkist_processing_core.workflow import Workflow
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
__version__ = version(distribution_name=__name__)
|
|
11
|
+
except PackageNotFoundError:
|
|
12
|
+
# package is not installed
|
|
13
|
+
__version__ = "unknown"
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""Utilities for the build pipeline."""
|
|
2
|
+
import importlib
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from shutil import rmtree
|
|
5
|
+
from types import ModuleType
|
|
6
|
+
|
|
7
|
+
from dkist_processing_core import Workflow
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
__all__ = ["validate_workflows", "export_dags", "export_notebook_dockerfile", "export_notebooks"]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def validate_workflows(workflow_package: ModuleType, export_path: Path | None = None) -> None:
|
|
14
|
+
"""Validate that workflow engine (airflow) objects are acyclic and that exported workflows compile."""
|
|
15
|
+
# configure export path. Clean up after if export path not provided
|
|
16
|
+
rm_export_path_after_test = not bool(export_path)
|
|
17
|
+
if export_path is None:
|
|
18
|
+
export_path = Path("export/")
|
|
19
|
+
workflows = extract_workflows_from_package(workflow_package)
|
|
20
|
+
try:
|
|
21
|
+
_validate_workflows(workflows, export_path)
|
|
22
|
+
finally:
|
|
23
|
+
if rm_export_path_after_test:
|
|
24
|
+
rmtree(export_path)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _validate_workflows(workflows: list[Workflow], export_path: Path) -> None:
|
|
28
|
+
"""Validate workflows by ensuring their exported version compiles as python and that there is at least one node."""
|
|
29
|
+
for w in workflows:
|
|
30
|
+
workflow_py = w.export_dag(path=export_path)
|
|
31
|
+
with workflow_py.open(mode="r") as f:
|
|
32
|
+
compile(f.read(), filename=f"{workflow_py.stem}.pyc", mode="exec")
|
|
33
|
+
if len(w.nodes) == 0:
|
|
34
|
+
raise ValueError(f"Workflow {w.workflow_name} has 0 nodes.")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def export_dags(workflow_package: ModuleType, path: str | Path) -> list[Path]:
|
|
38
|
+
"""Export Airflow DAG files."""
|
|
39
|
+
return [w.export_dag(path=path) for w in extract_workflows_from_package(workflow_package)]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def export_notebooks(workflow_package: ModuleType, path: str | Path) -> list[Path]:
|
|
43
|
+
"""Export Jupyter Notebook files."""
|
|
44
|
+
return [w.export_notebook(path=path) for w in extract_workflows_from_package(workflow_package)]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def export_notebook_dockerfile(workflow_package: ModuleType, path: str | Path) -> Path:
|
|
48
|
+
"""Export a dockerfile to containerize notebooks."""
|
|
49
|
+
path = Path(path)
|
|
50
|
+
notebook_paths = export_notebooks(workflow_package=workflow_package, path=path)
|
|
51
|
+
category = extract_category_from_workflows(workflow_package=workflow_package)
|
|
52
|
+
dockerfile = NotebookDockerfile(notebook_paths=notebook_paths, category=category)
|
|
53
|
+
dockerfile_path = Path("Dockerfile")
|
|
54
|
+
dockerfile_path.touch(exist_ok=False)
|
|
55
|
+
with open(dockerfile_path, mode="w") as f:
|
|
56
|
+
f.write(dockerfile.contents)
|
|
57
|
+
return dockerfile_path
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def extract_category_from_workflows(workflow_package: ModuleType) -> str:
|
|
61
|
+
"""Extract the category from the workflows in the package to provide a unique category for the dockerfile."""
|
|
62
|
+
workflows = extract_workflows_from_package(workflow_package)
|
|
63
|
+
categories = {w.category for w in workflows}
|
|
64
|
+
if len(categories) > 1:
|
|
65
|
+
raise ValueError(
|
|
66
|
+
f"Multiple categories found in provided workflows. Categories found: {categories}"
|
|
67
|
+
)
|
|
68
|
+
return categories.pop()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def extract_workflows_from_package(workflow_package: ModuleType) -> list[Workflow]:
|
|
72
|
+
"""Extract all the Workflow objects from a package."""
|
|
73
|
+
return extract_objects_from_package_by_type(workflow_package, Workflow)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def extract_objects_from_package_by_type(package: ModuleType, object_type: type) -> list:
|
|
77
|
+
"""Extract all objects in public modules of a given type from a package."""
|
|
78
|
+
modules = parse_unprotected_modules_names_from_package(package)
|
|
79
|
+
objects = []
|
|
80
|
+
for module in modules:
|
|
81
|
+
imported_module = importlib.import_module(f".{module}", package.__name__)
|
|
82
|
+
objects += [var for var in vars(imported_module).values() if isinstance(var, object_type)]
|
|
83
|
+
return objects
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def parse_unprotected_modules_names_from_package(package: ModuleType) -> list[str]:
|
|
87
|
+
"""Parse the names of all modules in a package that are not private i.e. don't begin with an underscore."""
|
|
88
|
+
package_path = Path(package.__path__[0])
|
|
89
|
+
return [m.stem for m in package_path.glob("[!_]*.py")]
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class NotebookDockerfile:
|
|
93
|
+
"""Build a Dockerfile for deployment as a Manual Processing Worker."""
|
|
94
|
+
|
|
95
|
+
def __init__(self, notebook_paths: list[Path], category: str):
|
|
96
|
+
self.notebook_paths = notebook_paths
|
|
97
|
+
self.validate_notebook_paths_are_relative()
|
|
98
|
+
self.category = category
|
|
99
|
+
|
|
100
|
+
def validate_notebook_paths_are_relative(self):
|
|
101
|
+
"""Validate that the notebook paths are all relative."""
|
|
102
|
+
return all([not p.is_absolute() for p in self.notebook_paths])
|
|
103
|
+
|
|
104
|
+
@property
|
|
105
|
+
def contents(self) -> str:
|
|
106
|
+
"""Return the Dockerfile body."""
|
|
107
|
+
return "\n".join(self.preamble + self.setup + self.notebooks + self.command)
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def preamble(self) -> list[str]:
|
|
111
|
+
"""Dockerfile preamble lines."""
|
|
112
|
+
return ["FROM python:3.11", "ENV LANG=C.UTF-8"]
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def setup(self) -> list[str]:
|
|
116
|
+
"""Environment setup lines."""
|
|
117
|
+
return [
|
|
118
|
+
"COPY . /app",
|
|
119
|
+
"WORKDIR /app",
|
|
120
|
+
"RUN python -m pip install -U pip",
|
|
121
|
+
"RUN pip install notebook",
|
|
122
|
+
"RUN pip freeze | grep notebook= > constraints.txt",
|
|
123
|
+
"RUN cat constraints.txt",
|
|
124
|
+
"RUN python -m pip install -c constraints.txt .",
|
|
125
|
+
]
|
|
126
|
+
|
|
127
|
+
@property
|
|
128
|
+
def notebooks(self) -> list[str]:
|
|
129
|
+
"""Generate workflow notebooks and include in Docker container."""
|
|
130
|
+
return [f"COPY {notebook_path} /notebooks/" for notebook_path in self.notebook_paths]
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def command(self) -> list[str]:
|
|
134
|
+
"""Run notebook server on deployment."""
|
|
135
|
+
port = 8888
|
|
136
|
+
return [
|
|
137
|
+
f"EXPOSE {port}",
|
|
138
|
+
f"CMD jupyter notebook --NotebookApp.allow_root=True --NotebookApp.base_url='/mpw-{self.category}/' --NotebookApp.ip='0.0.0.0' --NotebookApp.port={port} --MappingKernelManager.cull_idle_timeout=300 --notebook-dir=/notebooks --allow-root",
|
|
139
|
+
]
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""Environment controlled configurations for dkist_processing_core."""
|
|
2
|
+
from dkist_service_configuration import MeshServiceConfigurationBase
|
|
3
|
+
from dkist_service_configuration.settings import MeshService
|
|
4
|
+
from pydantic import Field
|
|
5
|
+
from talus import ConnectionRetryerFactory
|
|
6
|
+
from talus import Exchange
|
|
7
|
+
from talus.models.connection_parameters import ConnectionParameterFactory
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DKISTProcessingCoreConfiguration(MeshServiceConfigurationBase):
|
|
11
|
+
"""Environment configurations for dkist_processing_core."""
|
|
12
|
+
|
|
13
|
+
isb_username: str = Field(default="guest")
|
|
14
|
+
isb_password: str = Field(default="guest")
|
|
15
|
+
isb_exchange: str = Field(default="master.direct.x")
|
|
16
|
+
isb_queue_type: str = Field(default="classic")
|
|
17
|
+
elastic_apm_service_name: str = Field(default="dkist-processing-core")
|
|
18
|
+
elastic_apm_other_options: dict = Field(default_factory=dict)
|
|
19
|
+
elastic_apm_enabled: bool = False
|
|
20
|
+
build_version: str = Field(default="dev")
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def isb_mesh_service(self) -> MeshService:
|
|
24
|
+
"""Return the mesh service details for the interservice-bus."""
|
|
25
|
+
return self.service_mesh_detail(
|
|
26
|
+
service_name="interservice-bus",
|
|
27
|
+
default_mesh_service=MeshService(mesh_address="localhost", mesh_port=5672),
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def isb_producer_connection_parameters(self) -> ConnectionParameterFactory:
|
|
32
|
+
"""Return the connection parameters for the ISB producer."""
|
|
33
|
+
return ConnectionParameterFactory(
|
|
34
|
+
rabbitmq_host=self.isb_mesh_service.host,
|
|
35
|
+
rabbitmq_port=self.isb_mesh_service.port,
|
|
36
|
+
rabbitmq_user=self.isb_username,
|
|
37
|
+
rabbitmq_pass=self.isb_password,
|
|
38
|
+
connection_name="dkist-processing-core-producer",
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def isb_connection_retryer(self) -> ConnectionRetryerFactory:
|
|
43
|
+
"""Return the connection retryer for the ISB connection."""
|
|
44
|
+
return ConnectionRetryerFactory(
|
|
45
|
+
delay_min=1,
|
|
46
|
+
delay_max=5,
|
|
47
|
+
backoff=1,
|
|
48
|
+
jitter_min=1,
|
|
49
|
+
jitter_max=3,
|
|
50
|
+
attempts=3,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def isb_queue_arguments(self) -> dict:
|
|
55
|
+
"""Return the queue arguments for the ISB."""
|
|
56
|
+
return {
|
|
57
|
+
"x-queue-type": self.isb_queue_type,
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def isb_publish_exchange(self) -> Exchange:
|
|
62
|
+
"""Return the exchange for the ISB."""
|
|
63
|
+
return Exchange(name=self.isb_exchange)
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def elastic_apm_server_url(self) -> str:
|
|
67
|
+
"""Return the URL for the Elastic APM server."""
|
|
68
|
+
apm_server = self.service_mesh_detail(service_name="system-monitoring-log-apm")
|
|
69
|
+
return f"http://{apm_server.host}:{apm_server.port}/"
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def apm_config(self) -> dict:
|
|
73
|
+
"""Return the configuration for the Elastic APM."""
|
|
74
|
+
return {
|
|
75
|
+
"SERVICE_NAME": self.elastic_apm_service_name,
|
|
76
|
+
"SERVER_URL": self.elastic_apm_server_url,
|
|
77
|
+
"ENVIRONMENT": "Workflows",
|
|
78
|
+
**self.elastic_apm_other_options,
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
core_configurations = DKISTProcessingCoreConfiguration()
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""Define the failure callback functionality."""
|
|
2
|
+
import logging
|
|
3
|
+
from contextlib import contextmanager
|
|
4
|
+
from typing import Callable
|
|
5
|
+
from typing import Type
|
|
6
|
+
|
|
7
|
+
from talus import Binding
|
|
8
|
+
from talus import DurableProducer
|
|
9
|
+
from talus import MessageBodyBase
|
|
10
|
+
from talus import PublishMessageBase
|
|
11
|
+
from talus import Queue
|
|
12
|
+
|
|
13
|
+
from dkist_processing_core.config import core_configurations
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# Recipe run failure message Definition
|
|
19
|
+
class RecipeRunFailureMessageBody(MessageBodyBase):
|
|
20
|
+
"""Schema for the recipe run failure message body."""
|
|
21
|
+
|
|
22
|
+
workflowName: str
|
|
23
|
+
workflowVersion: str
|
|
24
|
+
taskName: str
|
|
25
|
+
dagRunId: str | None = None
|
|
26
|
+
logUrl: str | None = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class RecipeRunFailureMessage(PublishMessageBase):
|
|
30
|
+
"""Recipe run failure message including the message body and other publication information."""
|
|
31
|
+
|
|
32
|
+
message_body_cls: Type[RecipeRunFailureMessageBody] = RecipeRunFailureMessageBody
|
|
33
|
+
default_routing_key: str = "recipe.run.failure.m"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@contextmanager
|
|
37
|
+
def recipe_run_failure_message_producer_factory() -> DurableProducer:
|
|
38
|
+
"""Create message producer for recipe run failure messages."""
|
|
39
|
+
# Configure the queue the messages should be routed to
|
|
40
|
+
recipe_run_failure_queue = Queue(
|
|
41
|
+
name="recipe.run.failure.q", arguments=core_configurations.isb_queue_arguments
|
|
42
|
+
)
|
|
43
|
+
# Configure the exchange and queue bindings for publishing
|
|
44
|
+
bindings = [Binding(queue=recipe_run_failure_queue, message=RecipeRunFailureMessage)]
|
|
45
|
+
try:
|
|
46
|
+
with DurableProducer(
|
|
47
|
+
queue_bindings=bindings,
|
|
48
|
+
publish_exchange=core_configurations.isb_publish_exchange,
|
|
49
|
+
connection_parameters=core_configurations.isb_producer_connection_parameters,
|
|
50
|
+
connection_retryer=core_configurations.isb_connection_retryer,
|
|
51
|
+
) as producer:
|
|
52
|
+
yield producer
|
|
53
|
+
finally:
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def parse_dag_run_id_from_context(context: dict) -> str | None:
|
|
58
|
+
"""Find dag run id."""
|
|
59
|
+
return context.get("run_id", None)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def parse_log_url_from_context(context: dict) -> str | None:
|
|
63
|
+
"""Given an airflow context, find the URL of the logs created by the task."""
|
|
64
|
+
ti = context.get("task_instance", object)
|
|
65
|
+
try:
|
|
66
|
+
return ti.log_url
|
|
67
|
+
except AttributeError:
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def chat_ops_notification(
|
|
72
|
+
context: dict,
|
|
73
|
+
workflow_name: str,
|
|
74
|
+
workflow_version: str,
|
|
75
|
+
task_name: str,
|
|
76
|
+
producer_factory: Callable[[], DurableProducer] = recipe_run_failure_message_producer_factory,
|
|
77
|
+
) -> RecipeRunFailureMessage:
|
|
78
|
+
"""Publish message with information regarding a task failure for publication to a chat service."""
|
|
79
|
+
dag_run_id = parse_dag_run_id_from_context(context)
|
|
80
|
+
log_url = parse_log_url_from_context(context)
|
|
81
|
+
body = RecipeRunFailureMessageBody(
|
|
82
|
+
workflowName=workflow_name,
|
|
83
|
+
workflowVersion=workflow_version,
|
|
84
|
+
taskName=task_name,
|
|
85
|
+
logUrl=log_url,
|
|
86
|
+
dagRunId=dag_run_id,
|
|
87
|
+
)
|
|
88
|
+
message = RecipeRunFailureMessage(body)
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
with producer_factory() as producer:
|
|
92
|
+
logger.warning(f"Publishing failure callback message: {message=}")
|
|
93
|
+
producer.publish(message)
|
|
94
|
+
return message
|
|
95
|
+
except Exception as e: # pragma: no cover
|
|
96
|
+
logger.error(f"Error raised executing failure callback: {e=}") # pragma: no cover
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""Abstraction layer to construct a workflow node using and airflow operator."""
|
|
2
|
+
from collections.abc import Iterable
|
|
3
|
+
from typing import Type
|
|
4
|
+
|
|
5
|
+
from airflow.operators.bash import BashOperator
|
|
6
|
+
|
|
7
|
+
from dkist_processing_core.resource_queue import ResourceQueue
|
|
8
|
+
from dkist_processing_core.task import TaskBase
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
task_type_hint = Type[TaskBase]
|
|
12
|
+
upstreams_type_hint = list[task_type_hint] | task_type_hint | None
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Node:
|
|
16
|
+
"""Abstraction to instantiate a Task in a Workflow graph for target execution environments."""
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
workflow_name: str,
|
|
21
|
+
workflow_version: str,
|
|
22
|
+
workflow_package: str,
|
|
23
|
+
task: task_type_hint,
|
|
24
|
+
resource_queue: ResourceQueue,
|
|
25
|
+
upstreams: upstreams_type_hint = None,
|
|
26
|
+
pip_extras: list[str] | None = None,
|
|
27
|
+
):
|
|
28
|
+
"""Node setup."""
|
|
29
|
+
# Task type checking
|
|
30
|
+
upstreams = upstreams or []
|
|
31
|
+
if not isinstance(upstreams, Iterable):
|
|
32
|
+
upstreams = [
|
|
33
|
+
upstreams,
|
|
34
|
+
]
|
|
35
|
+
if not all([issubclass(t, TaskBase) for t in [task] + upstreams]):
|
|
36
|
+
raise TypeError(
|
|
37
|
+
"Only task classes inheriting from "
|
|
38
|
+
"dkist_processing_core.TaskBase can be added to a workflow"
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
self.workflow_name = workflow_name
|
|
42
|
+
self.workflow_version = workflow_version
|
|
43
|
+
self.task = task
|
|
44
|
+
self.workflow_package = workflow_package
|
|
45
|
+
self.upstreams = upstreams
|
|
46
|
+
self.resource_queue = resource_queue
|
|
47
|
+
self.pip_extras = pip_extras
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def operator(self) -> BashOperator:
|
|
51
|
+
"""Native engine node."""
|
|
52
|
+
from datetime import timedelta
|
|
53
|
+
from dkist_processing_core.failure_callback import chat_ops_notification
|
|
54
|
+
from functools import partial
|
|
55
|
+
|
|
56
|
+
return eval(self.operator_definition)
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def notebook_cell(self) -> str:
|
|
60
|
+
"""Render the node as python code for a notebook cell."""
|
|
61
|
+
lines = [
|
|
62
|
+
f"from {self.task.__module__} import {self.task.__name__}",
|
|
63
|
+
f"with {self.task.__name__}(recipe_run_id=recipe_run_id, workflow_name='{self.workflow_name}', workflow_version='{self.workflow_version}') as t:\n #t.is_task_manual = True\n t()\n #t.rollback()",
|
|
64
|
+
]
|
|
65
|
+
return "\n".join(lines)
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def operator_definition(self) -> str:
|
|
69
|
+
"""Airflow style command to define a bash operator."""
|
|
70
|
+
return f"""BashOperator(
|
|
71
|
+
task_id='{self.task.__name__}',
|
|
72
|
+
bash_command='''{self.bash_script}''',
|
|
73
|
+
retries={self.task.retries},
|
|
74
|
+
retry_delay=timedelta(seconds={self.task.retry_delay_seconds}),
|
|
75
|
+
on_failure_callback=partial(
|
|
76
|
+
chat_ops_notification,
|
|
77
|
+
workflow_name='{self.workflow_name}',
|
|
78
|
+
workflow_version='{self.workflow_version}',
|
|
79
|
+
task_name='{self.task.__name__}'
|
|
80
|
+
),
|
|
81
|
+
owner="DKIST Data Center",
|
|
82
|
+
queue="{self.resource_queue.value}",
|
|
83
|
+
output_processor=str,
|
|
84
|
+
)
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def dependencies(self) -> list[tuple[str, str]]:
|
|
89
|
+
"""List of upstream, downstream task name tuples."""
|
|
90
|
+
return [(upstream.__name__, self.task.__name__) for upstream in self.upstreams]
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def bash_script(self) -> str:
|
|
94
|
+
"""Format bash script for the BashOperator."""
|
|
95
|
+
command = f"""{self.install_command}
|
|
96
|
+
{self.run_command}"""
|
|
97
|
+
return self.bash_template(command)
|
|
98
|
+
|
|
99
|
+
@staticmethod
|
|
100
|
+
def bash_template(command: str) -> str:
|
|
101
|
+
"""Return the bash script with a template wrapped command."""
|
|
102
|
+
return f"""#!/bin/bash
|
|
103
|
+
echo Working Directory
|
|
104
|
+
pwd
|
|
105
|
+
echo Worker Identification
|
|
106
|
+
echo NOMAD_ALLOC_ID
|
|
107
|
+
echo $NOMAD_ALLOC_ID
|
|
108
|
+
echo NOMAD_GROUP_NAME
|
|
109
|
+
echo $NOMAD_GROUP_NAME
|
|
110
|
+
echo NOMAD_HOST_ADDR_worker
|
|
111
|
+
echo $NOMAD_HOST_ADDR_worker
|
|
112
|
+
echo NOMAD_ALLOC_NAME
|
|
113
|
+
echo $NOMAD_ALLOC_NAME
|
|
114
|
+
echo Host Python Environment i.e. system-site-packages
|
|
115
|
+
python3 -m pip install --upgrade --user pip
|
|
116
|
+
pip list
|
|
117
|
+
echo Creating Virtual Environment
|
|
118
|
+
python3 -m venv --system-site-packages .task_venv
|
|
119
|
+
echo Activate Environment
|
|
120
|
+
. .task_venv/bin/activate
|
|
121
|
+
echo Python Interpreter Location
|
|
122
|
+
which python
|
|
123
|
+
echo Run Main Command
|
|
124
|
+
{command}
|
|
125
|
+
export exit_code=$?
|
|
126
|
+
echo Deactivate Environment
|
|
127
|
+
deactivate
|
|
128
|
+
echo Remove Virtual Environment
|
|
129
|
+
rm -rf .task_venv
|
|
130
|
+
echo Exit with code from main command: $exit_code
|
|
131
|
+
exit $exit_code"""
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def formatted_pip_extras(self) -> str:
|
|
135
|
+
"""Format pip extras for the installation command."""
|
|
136
|
+
if self.pip_extras:
|
|
137
|
+
extra_requirements = ",".join(self.pip_extras)
|
|
138
|
+
return f"'[{extra_requirements}]'"
|
|
139
|
+
return ""
|
|
140
|
+
|
|
141
|
+
@property
|
|
142
|
+
def install_command(self) -> str:
|
|
143
|
+
"""Format the installation command for the bash script."""
|
|
144
|
+
repo_name = self.workflow_package.split(".")[0].replace("_", "-")
|
|
145
|
+
version = self.workflow_version
|
|
146
|
+
extras = self.formatted_pip_extras
|
|
147
|
+
return f"""python -m pip install --upgrade pip
|
|
148
|
+
python -m pip install {repo_name}{extras}=={version}"""
|
|
149
|
+
|
|
150
|
+
@property
|
|
151
|
+
def run_command(self) -> str:
|
|
152
|
+
"""Return the python bash command to execute the task."""
|
|
153
|
+
return f'python -c "{self.python}"'
|
|
154
|
+
|
|
155
|
+
@property
|
|
156
|
+
def python(self) -> str:
|
|
157
|
+
"""Return the python code to execute the task."""
|
|
158
|
+
return f"""from {self.task.__module__} import {self.task.__name__}
|
|
159
|
+
with {self.task.__name__}(recipe_run_id={{{{dag_run.conf['recipe_run_id']}}}}, workflow_name='{self.workflow_name}', workflow_version='{self.workflow_version}') as task:
|
|
160
|
+
task()
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
def __repr__(self):
|
|
164
|
+
"""Render node instantiation as a string."""
|
|
165
|
+
return f"Node(workflow_name={self.workflow_name}, workflow_version={self.workflow_version}, workflow_package={self.workflow_package}, task={self.task!r}, upstreams={self.upstreams}, queue={self.resource_queue!r})"
|
|
166
|
+
|
|
167
|
+
def __str__(self):
|
|
168
|
+
"""Render node instance as a string."""
|
|
169
|
+
return repr(self)
|