dkist-processing-core 4.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. changelog/.gitempty +0 -0
  2. dkist_processing_core/__init__.py +13 -0
  3. dkist_processing_core/build_utils.py +139 -0
  4. dkist_processing_core/config.py +82 -0
  5. dkist_processing_core/failure_callback.py +96 -0
  6. dkist_processing_core/node.py +169 -0
  7. dkist_processing_core/resource_queue.py +9 -0
  8. dkist_processing_core/task.py +250 -0
  9. dkist_processing_core/tests/__init__.py +1 -0
  10. dkist_processing_core/tests/conftest.py +172 -0
  11. dkist_processing_core/tests/invalid_workflow_cyclic/__init__.py +1 -0
  12. dkist_processing_core/tests/invalid_workflow_cyclic/workflow.py +21 -0
  13. dkist_processing_core/tests/invalid_workflow_for_docker_multi_category/__init__.py +0 -0
  14. dkist_processing_core/tests/invalid_workflow_for_docker_multi_category/workflow.py +21 -0
  15. dkist_processing_core/tests/task_example.py +45 -0
  16. dkist_processing_core/tests/test_build_utils.py +128 -0
  17. dkist_processing_core/tests/test_export.py +71 -0
  18. dkist_processing_core/tests/test_failure_callback.py +90 -0
  19. dkist_processing_core/tests/test_node.py +156 -0
  20. dkist_processing_core/tests/test_task.py +82 -0
  21. dkist_processing_core/tests/test_workflow.py +212 -0
  22. dkist_processing_core/tests/valid_workflow_package/__init__.py +1 -0
  23. dkist_processing_core/tests/valid_workflow_package/workflow.py +21 -0
  24. dkist_processing_core/tests/zero_node_workflow_package/__init__.py +1 -0
  25. dkist_processing_core/tests/zero_node_workflow_package/workflow.py +9 -0
  26. dkist_processing_core/workflow.py +294 -0
  27. dkist_processing_core-4.3.0.dist-info/METADATA +249 -0
  28. dkist_processing_core-4.3.0.dist-info/RECORD +41 -0
  29. dkist_processing_core-4.3.0.dist-info/WHEEL +5 -0
  30. dkist_processing_core-4.3.0.dist-info/top_level.txt +4 -0
  31. docs/Makefile +134 -0
  32. docs/auto-proc-concept-model.png +0 -0
  33. docs/auto_proc_brick.png +0 -0
  34. docs/automated-processing-deployed.png +0 -0
  35. docs/changelog.rst +6 -0
  36. docs/conf.py +50 -0
  37. docs/index.rst +9 -0
  38. docs/landing_page.rst +34 -0
  39. docs/make.bat +170 -0
  40. docs/requirements.txt +1 -0
  41. licenses/LICENSE.rst +11 -0
changelog/.gitempty ADDED
File without changes
@@ -0,0 +1,13 @@
1
+ """Package-level setup information."""
2
+ from importlib.metadata import PackageNotFoundError
3
+ from importlib.metadata import version
4
+
5
+ from dkist_processing_core.resource_queue import ResourceQueue
6
+ from dkist_processing_core.task import TaskBase
7
+ from dkist_processing_core.workflow import Workflow
8
+
9
+ try:
10
+ __version__ = version(distribution_name=__name__)
11
+ except PackageNotFoundError:
12
+ # package is not installed
13
+ __version__ = "unknown"
@@ -0,0 +1,139 @@
1
+ """Utilities for the build pipeline."""
2
+ import importlib
3
+ from pathlib import Path
4
+ from shutil import rmtree
5
+ from types import ModuleType
6
+
7
+ from dkist_processing_core import Workflow
8
+
9
+
10
+ __all__ = ["validate_workflows", "export_dags", "export_notebook_dockerfile", "export_notebooks"]
11
+
12
+
13
+ def validate_workflows(workflow_package: ModuleType, export_path: Path | None = None) -> None:
14
+ """Validate that workflow engine (airflow) objects are acyclic and that exported workflows compile."""
15
+ # configure export path. Clean up after if export path not provided
16
+ rm_export_path_after_test = not bool(export_path)
17
+ if export_path is None:
18
+ export_path = Path("export/")
19
+ workflows = extract_workflows_from_package(workflow_package)
20
+ try:
21
+ _validate_workflows(workflows, export_path)
22
+ finally:
23
+ if rm_export_path_after_test:
24
+ rmtree(export_path)
25
+
26
+
27
+ def _validate_workflows(workflows: list[Workflow], export_path: Path) -> None:
28
+ """Validate workflows by ensuring their exported version compiles as python and that there is at least one node."""
29
+ for w in workflows:
30
+ workflow_py = w.export_dag(path=export_path)
31
+ with workflow_py.open(mode="r") as f:
32
+ compile(f.read(), filename=f"{workflow_py.stem}.pyc", mode="exec")
33
+ if len(w.nodes) == 0:
34
+ raise ValueError(f"Workflow {w.workflow_name} has 0 nodes.")
35
+
36
+
37
+ def export_dags(workflow_package: ModuleType, path: str | Path) -> list[Path]:
38
+ """Export Airflow DAG files."""
39
+ return [w.export_dag(path=path) for w in extract_workflows_from_package(workflow_package)]
40
+
41
+
42
+ def export_notebooks(workflow_package: ModuleType, path: str | Path) -> list[Path]:
43
+ """Export Jupyter Notebook files."""
44
+ return [w.export_notebook(path=path) for w in extract_workflows_from_package(workflow_package)]
45
+
46
+
47
+ def export_notebook_dockerfile(workflow_package: ModuleType, path: str | Path) -> Path:
48
+ """Export a dockerfile to containerize notebooks."""
49
+ path = Path(path)
50
+ notebook_paths = export_notebooks(workflow_package=workflow_package, path=path)
51
+ category = extract_category_from_workflows(workflow_package=workflow_package)
52
+ dockerfile = NotebookDockerfile(notebook_paths=notebook_paths, category=category)
53
+ dockerfile_path = Path("Dockerfile")
54
+ dockerfile_path.touch(exist_ok=False)
55
+ with open(dockerfile_path, mode="w") as f:
56
+ f.write(dockerfile.contents)
57
+ return dockerfile_path
58
+
59
+
60
+ def extract_category_from_workflows(workflow_package: ModuleType) -> str:
61
+ """Extract the category from the workflows in the package to provide a unique category for the dockerfile."""
62
+ workflows = extract_workflows_from_package(workflow_package)
63
+ categories = {w.category for w in workflows}
64
+ if len(categories) > 1:
65
+ raise ValueError(
66
+ f"Multiple categories found in provided workflows. Categories found: {categories}"
67
+ )
68
+ return categories.pop()
69
+
70
+
71
+ def extract_workflows_from_package(workflow_package: ModuleType) -> list[Workflow]:
72
+ """Extract all the Workflow objects from a package."""
73
+ return extract_objects_from_package_by_type(workflow_package, Workflow)
74
+
75
+
76
+ def extract_objects_from_package_by_type(package: ModuleType, object_type: type) -> list:
77
+ """Extract all objects in public modules of a given type from a package."""
78
+ modules = parse_unprotected_modules_names_from_package(package)
79
+ objects = []
80
+ for module in modules:
81
+ imported_module = importlib.import_module(f".{module}", package.__name__)
82
+ objects += [var for var in vars(imported_module).values() if isinstance(var, object_type)]
83
+ return objects
84
+
85
+
86
+ def parse_unprotected_modules_names_from_package(package: ModuleType) -> list[str]:
87
+ """Parse the names of all modules in a package that are not private i.e. don't begin with an underscore."""
88
+ package_path = Path(package.__path__[0])
89
+ return [m.stem for m in package_path.glob("[!_]*.py")]
90
+
91
+
92
+ class NotebookDockerfile:
93
+ """Build a Dockerfile for deployment as a Manual Processing Worker."""
94
+
95
+ def __init__(self, notebook_paths: list[Path], category: str):
96
+ self.notebook_paths = notebook_paths
97
+ self.validate_notebook_paths_are_relative()
98
+ self.category = category
99
+
100
+ def validate_notebook_paths_are_relative(self):
101
+ """Validate that the notebook paths are all relative."""
102
+ return all([not p.is_absolute() for p in self.notebook_paths])
103
+
104
+ @property
105
+ def contents(self) -> str:
106
+ """Return the Dockerfile body."""
107
+ return "\n".join(self.preamble + self.setup + self.notebooks + self.command)
108
+
109
+ @property
110
+ def preamble(self) -> list[str]:
111
+ """Dockerfile preamble lines."""
112
+ return ["FROM python:3.11", "ENV LANG=C.UTF-8"]
113
+
114
+ @property
115
+ def setup(self) -> list[str]:
116
+ """Environment setup lines."""
117
+ return [
118
+ "COPY . /app",
119
+ "WORKDIR /app",
120
+ "RUN python -m pip install -U pip",
121
+ "RUN pip install notebook",
122
+ "RUN pip freeze | grep notebook= > constraints.txt",
123
+ "RUN cat constraints.txt",
124
+ "RUN python -m pip install -c constraints.txt .",
125
+ ]
126
+
127
+ @property
128
+ def notebooks(self) -> list[str]:
129
+ """Generate workflow notebooks and include in Docker container."""
130
+ return [f"COPY {notebook_path} /notebooks/" for notebook_path in self.notebook_paths]
131
+
132
+ @property
133
+ def command(self) -> list[str]:
134
+ """Run notebook server on deployment."""
135
+ port = 8888
136
+ return [
137
+ f"EXPOSE {port}",
138
+ f"CMD jupyter notebook --NotebookApp.allow_root=True --NotebookApp.base_url='/mpw-{self.category}/' --NotebookApp.ip='0.0.0.0' --NotebookApp.port={port} --MappingKernelManager.cull_idle_timeout=300 --notebook-dir=/notebooks --allow-root",
139
+ ]
@@ -0,0 +1,82 @@
1
+ """Environment controlled configurations for dkist_processing_core."""
2
+ from dkist_service_configuration import MeshServiceConfigurationBase
3
+ from dkist_service_configuration.settings import MeshService
4
+ from pydantic import Field
5
+ from talus import ConnectionRetryerFactory
6
+ from talus import Exchange
7
+ from talus.models.connection_parameters import ConnectionParameterFactory
8
+
9
+
10
+ class DKISTProcessingCoreConfiguration(MeshServiceConfigurationBase):
11
+ """Environment configurations for dkist_processing_core."""
12
+
13
+ isb_username: str = Field(default="guest")
14
+ isb_password: str = Field(default="guest")
15
+ isb_exchange: str = Field(default="master.direct.x")
16
+ isb_queue_type: str = Field(default="classic")
17
+ elastic_apm_service_name: str = Field(default="dkist-processing-core")
18
+ elastic_apm_other_options: dict = Field(default_factory=dict)
19
+ elastic_apm_enabled: bool = False
20
+ build_version: str = Field(default="dev")
21
+
22
+ @property
23
+ def isb_mesh_service(self) -> MeshService:
24
+ """Return the mesh service details for the interservice-bus."""
25
+ return self.service_mesh_detail(
26
+ service_name="interservice-bus",
27
+ default_mesh_service=MeshService(mesh_address="localhost", mesh_port=5672),
28
+ )
29
+
30
+ @property
31
+ def isb_producer_connection_parameters(self) -> ConnectionParameterFactory:
32
+ """Return the connection parameters for the ISB producer."""
33
+ return ConnectionParameterFactory(
34
+ rabbitmq_host=self.isb_mesh_service.host,
35
+ rabbitmq_port=self.isb_mesh_service.port,
36
+ rabbitmq_user=self.isb_username,
37
+ rabbitmq_pass=self.isb_password,
38
+ connection_name="dkist-processing-core-producer",
39
+ )
40
+
41
+ @property
42
+ def isb_connection_retryer(self) -> ConnectionRetryerFactory:
43
+ """Return the connection retryer for the ISB connection."""
44
+ return ConnectionRetryerFactory(
45
+ delay_min=1,
46
+ delay_max=5,
47
+ backoff=1,
48
+ jitter_min=1,
49
+ jitter_max=3,
50
+ attempts=3,
51
+ )
52
+
53
+ @property
54
+ def isb_queue_arguments(self) -> dict:
55
+ """Return the queue arguments for the ISB."""
56
+ return {
57
+ "x-queue-type": self.isb_queue_type,
58
+ }
59
+
60
+ @property
61
+ def isb_publish_exchange(self) -> Exchange:
62
+ """Return the exchange for the ISB."""
63
+ return Exchange(name=self.isb_exchange)
64
+
65
+ @property
66
+ def elastic_apm_server_url(self) -> str:
67
+ """Return the URL for the Elastic APM server."""
68
+ apm_server = self.service_mesh_detail(service_name="system-monitoring-log-apm")
69
+ return f"http://{apm_server.host}:{apm_server.port}/"
70
+
71
+ @property
72
+ def apm_config(self) -> dict:
73
+ """Return the configuration for the Elastic APM."""
74
+ return {
75
+ "SERVICE_NAME": self.elastic_apm_service_name,
76
+ "SERVER_URL": self.elastic_apm_server_url,
77
+ "ENVIRONMENT": "Workflows",
78
+ **self.elastic_apm_other_options,
79
+ }
80
+
81
+
82
+ core_configurations = DKISTProcessingCoreConfiguration()
@@ -0,0 +1,96 @@
1
+ """Define the failure callback functionality."""
2
+ import logging
3
+ from contextlib import contextmanager
4
+ from typing import Callable
5
+ from typing import Type
6
+
7
+ from talus import Binding
8
+ from talus import DurableProducer
9
+ from talus import MessageBodyBase
10
+ from talus import PublishMessageBase
11
+ from talus import Queue
12
+
13
+ from dkist_processing_core.config import core_configurations
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ # Recipe run failure message Definition
19
+ class RecipeRunFailureMessageBody(MessageBodyBase):
20
+ """Schema for the recipe run failure message body."""
21
+
22
+ workflowName: str
23
+ workflowVersion: str
24
+ taskName: str
25
+ dagRunId: str | None = None
26
+ logUrl: str | None = None
27
+
28
+
29
+ class RecipeRunFailureMessage(PublishMessageBase):
30
+ """Recipe run failure message including the message body and other publication information."""
31
+
32
+ message_body_cls: Type[RecipeRunFailureMessageBody] = RecipeRunFailureMessageBody
33
+ default_routing_key: str = "recipe.run.failure.m"
34
+
35
+
36
+ @contextmanager
37
+ def recipe_run_failure_message_producer_factory() -> DurableProducer:
38
+ """Create message producer for recipe run failure messages."""
39
+ # Configure the queue the messages should be routed to
40
+ recipe_run_failure_queue = Queue(
41
+ name="recipe.run.failure.q", arguments=core_configurations.isb_queue_arguments
42
+ )
43
+ # Configure the exchange and queue bindings for publishing
44
+ bindings = [Binding(queue=recipe_run_failure_queue, message=RecipeRunFailureMessage)]
45
+ try:
46
+ with DurableProducer(
47
+ queue_bindings=bindings,
48
+ publish_exchange=core_configurations.isb_publish_exchange,
49
+ connection_parameters=core_configurations.isb_producer_connection_parameters,
50
+ connection_retryer=core_configurations.isb_connection_retryer,
51
+ ) as producer:
52
+ yield producer
53
+ finally:
54
+ pass
55
+
56
+
57
+ def parse_dag_run_id_from_context(context: dict) -> str | None:
58
+ """Find dag run id."""
59
+ return context.get("run_id", None)
60
+
61
+
62
+ def parse_log_url_from_context(context: dict) -> str | None:
63
+ """Given an airflow context, find the URL of the logs created by the task."""
64
+ ti = context.get("task_instance", object)
65
+ try:
66
+ return ti.log_url
67
+ except AttributeError:
68
+ pass
69
+
70
+
71
+ def chat_ops_notification(
72
+ context: dict,
73
+ workflow_name: str,
74
+ workflow_version: str,
75
+ task_name: str,
76
+ producer_factory: Callable[[], DurableProducer] = recipe_run_failure_message_producer_factory,
77
+ ) -> RecipeRunFailureMessage:
78
+ """Publish message with information regarding a task failure for publication to a chat service."""
79
+ dag_run_id = parse_dag_run_id_from_context(context)
80
+ log_url = parse_log_url_from_context(context)
81
+ body = RecipeRunFailureMessageBody(
82
+ workflowName=workflow_name,
83
+ workflowVersion=workflow_version,
84
+ taskName=task_name,
85
+ logUrl=log_url,
86
+ dagRunId=dag_run_id,
87
+ )
88
+ message = RecipeRunFailureMessage(body)
89
+
90
+ try:
91
+ with producer_factory() as producer:
92
+ logger.warning(f"Publishing failure callback message: {message=}")
93
+ producer.publish(message)
94
+ return message
95
+ except Exception as e: # pragma: no cover
96
+ logger.error(f"Error raised executing failure callback: {e=}") # pragma: no cover
@@ -0,0 +1,169 @@
1
+ """Abstraction layer to construct a workflow node using and airflow operator."""
2
+ from collections.abc import Iterable
3
+ from typing import Type
4
+
5
+ from airflow.operators.bash import BashOperator
6
+
7
+ from dkist_processing_core.resource_queue import ResourceQueue
8
+ from dkist_processing_core.task import TaskBase
9
+
10
+
11
+ task_type_hint = Type[TaskBase]
12
+ upstreams_type_hint = list[task_type_hint] | task_type_hint | None
13
+
14
+
15
+ class Node:
16
+ """Abstraction to instantiate a Task in a Workflow graph for target execution environments."""
17
+
18
+ def __init__(
19
+ self,
20
+ workflow_name: str,
21
+ workflow_version: str,
22
+ workflow_package: str,
23
+ task: task_type_hint,
24
+ resource_queue: ResourceQueue,
25
+ upstreams: upstreams_type_hint = None,
26
+ pip_extras: list[str] | None = None,
27
+ ):
28
+ """Node setup."""
29
+ # Task type checking
30
+ upstreams = upstreams or []
31
+ if not isinstance(upstreams, Iterable):
32
+ upstreams = [
33
+ upstreams,
34
+ ]
35
+ if not all([issubclass(t, TaskBase) for t in [task] + upstreams]):
36
+ raise TypeError(
37
+ "Only task classes inheriting from "
38
+ "dkist_processing_core.TaskBase can be added to a workflow"
39
+ )
40
+
41
+ self.workflow_name = workflow_name
42
+ self.workflow_version = workflow_version
43
+ self.task = task
44
+ self.workflow_package = workflow_package
45
+ self.upstreams = upstreams
46
+ self.resource_queue = resource_queue
47
+ self.pip_extras = pip_extras
48
+
49
+ @property
50
+ def operator(self) -> BashOperator:
51
+ """Native engine node."""
52
+ from datetime import timedelta
53
+ from dkist_processing_core.failure_callback import chat_ops_notification
54
+ from functools import partial
55
+
56
+ return eval(self.operator_definition)
57
+
58
+ @property
59
+ def notebook_cell(self) -> str:
60
+ """Render the node as python code for a notebook cell."""
61
+ lines = [
62
+ f"from {self.task.__module__} import {self.task.__name__}",
63
+ f"with {self.task.__name__}(recipe_run_id=recipe_run_id, workflow_name='{self.workflow_name}', workflow_version='{self.workflow_version}') as t:\n #t.is_task_manual = True\n t()\n #t.rollback()",
64
+ ]
65
+ return "\n".join(lines)
66
+
67
+ @property
68
+ def operator_definition(self) -> str:
69
+ """Airflow style command to define a bash operator."""
70
+ return f"""BashOperator(
71
+ task_id='{self.task.__name__}',
72
+ bash_command='''{self.bash_script}''',
73
+ retries={self.task.retries},
74
+ retry_delay=timedelta(seconds={self.task.retry_delay_seconds}),
75
+ on_failure_callback=partial(
76
+ chat_ops_notification,
77
+ workflow_name='{self.workflow_name}',
78
+ workflow_version='{self.workflow_version}',
79
+ task_name='{self.task.__name__}'
80
+ ),
81
+ owner="DKIST Data Center",
82
+ queue="{self.resource_queue.value}",
83
+ output_processor=str,
84
+ )
85
+ """
86
+
87
+ @property
88
+ def dependencies(self) -> list[tuple[str, str]]:
89
+ """List of upstream, downstream task name tuples."""
90
+ return [(upstream.__name__, self.task.__name__) for upstream in self.upstreams]
91
+
92
+ @property
93
+ def bash_script(self) -> str:
94
+ """Format bash script for the BashOperator."""
95
+ command = f"""{self.install_command}
96
+ {self.run_command}"""
97
+ return self.bash_template(command)
98
+
99
+ @staticmethod
100
+ def bash_template(command: str) -> str:
101
+ """Return the bash script with a template wrapped command."""
102
+ return f"""#!/bin/bash
103
+ echo Working Directory
104
+ pwd
105
+ echo Worker Identification
106
+ echo NOMAD_ALLOC_ID
107
+ echo $NOMAD_ALLOC_ID
108
+ echo NOMAD_GROUP_NAME
109
+ echo $NOMAD_GROUP_NAME
110
+ echo NOMAD_HOST_ADDR_worker
111
+ echo $NOMAD_HOST_ADDR_worker
112
+ echo NOMAD_ALLOC_NAME
113
+ echo $NOMAD_ALLOC_NAME
114
+ echo Host Python Environment i.e. system-site-packages
115
+ python3 -m pip install --upgrade --user pip
116
+ pip list
117
+ echo Creating Virtual Environment
118
+ python3 -m venv --system-site-packages .task_venv
119
+ echo Activate Environment
120
+ . .task_venv/bin/activate
121
+ echo Python Interpreter Location
122
+ which python
123
+ echo Run Main Command
124
+ {command}
125
+ export exit_code=$?
126
+ echo Deactivate Environment
127
+ deactivate
128
+ echo Remove Virtual Environment
129
+ rm -rf .task_venv
130
+ echo Exit with code from main command: $exit_code
131
+ exit $exit_code"""
132
+
133
+ @property
134
+ def formatted_pip_extras(self) -> str:
135
+ """Format pip extras for the installation command."""
136
+ if self.pip_extras:
137
+ extra_requirements = ",".join(self.pip_extras)
138
+ return f"'[{extra_requirements}]'"
139
+ return ""
140
+
141
+ @property
142
+ def install_command(self) -> str:
143
+ """Format the installation command for the bash script."""
144
+ repo_name = self.workflow_package.split(".")[0].replace("_", "-")
145
+ version = self.workflow_version
146
+ extras = self.formatted_pip_extras
147
+ return f"""python -m pip install --upgrade pip
148
+ python -m pip install {repo_name}{extras}=={version}"""
149
+
150
+ @property
151
+ def run_command(self) -> str:
152
+ """Return the python bash command to execute the task."""
153
+ return f'python -c "{self.python}"'
154
+
155
+ @property
156
+ def python(self) -> str:
157
+ """Return the python code to execute the task."""
158
+ return f"""from {self.task.__module__} import {self.task.__name__}
159
+ with {self.task.__name__}(recipe_run_id={{{{dag_run.conf['recipe_run_id']}}}}, workflow_name='{self.workflow_name}', workflow_version='{self.workflow_version}') as task:
160
+ task()
161
+ """
162
+
163
+ def __repr__(self):
164
+ """Render node instantiation as a string."""
165
+ return f"Node(workflow_name={self.workflow_name}, workflow_version={self.workflow_version}, workflow_package={self.workflow_package}, task={self.task!r}, upstreams={self.upstreams}, queue={self.resource_queue!r})"
166
+
167
+ def __str__(self):
168
+ """Render node instance as a string."""
169
+ return repr(self)
@@ -0,0 +1,9 @@
1
+ """Resource queue names for Workflow Node specification which specify different resource needs."""
2
+ from enum import StrEnum
3
+
4
+
5
+ class ResourceQueue(StrEnum):
6
+ """Supported queue names."""
7
+
8
+ DEFAULT: str = "default"
9
+ HIGH_MEMORY: str = "high_memory"