airflow-unicore-integration 0.1.5__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {airflow_unicore_integration-0.1.5/src/airflow_unicore_integration.egg-info → airflow_unicore_integration-0.2.0}/PKG-INFO +27 -2
- {airflow_unicore_integration-0.1.5 → airflow_unicore_integration-0.2.0}/README.rst +26 -1
- {airflow_unicore_integration-0.1.5 → airflow_unicore_integration-0.2.0}/pyproject.toml +1 -1
- {airflow_unicore_integration-0.1.5 → airflow_unicore_integration-0.2.0}/src/airflow_unicore_integration/__init__.py +3 -0
- {airflow_unicore_integration-0.1.5 → airflow_unicore_integration-0.2.0}/src/airflow_unicore_integration/executors/unicore_executor.py +22 -17
- {airflow_unicore_integration-0.1.5 → airflow_unicore_integration-0.2.0}/src/airflow_unicore_integration/hooks/unicore_hooks.py +42 -2
- airflow_unicore_integration-0.2.0/src/airflow_unicore_integration/operators/container.py +22 -0
- {airflow_unicore_integration-0.1.5 → airflow_unicore_integration-0.2.0}/src/airflow_unicore_integration/operators/unicore_operators.py +4 -2
- {airflow_unicore_integration-0.1.5 → airflow_unicore_integration-0.2.0}/src/airflow_unicore_integration/util/job.py +97 -23
- airflow_unicore_integration-0.2.0/src/airflow_unicore_integration/util/launch_script_content.py +90 -0
- {airflow_unicore_integration-0.1.5 → airflow_unicore_integration-0.2.0/src/airflow_unicore_integration.egg-info}/PKG-INFO +27 -2
- {airflow_unicore_integration-0.1.5 → airflow_unicore_integration-0.2.0}/src/airflow_unicore_integration.egg-info/SOURCES.txt +3 -1
- {airflow_unicore_integration-0.1.5 → airflow_unicore_integration-0.2.0}/LICENSE +0 -0
- {airflow_unicore_integration-0.1.5 → airflow_unicore_integration-0.2.0}/setup.cfg +0 -0
- {airflow_unicore_integration-0.1.5 → airflow_unicore_integration-0.2.0}/src/airflow_unicore_integration/executors/__init__.py +0 -0
- {airflow_unicore_integration-0.1.5 → airflow_unicore_integration-0.2.0}/src/airflow_unicore_integration/executors/run_task_via_supervisor.py +0 -0
- {airflow_unicore_integration-0.1.5 → airflow_unicore_integration-0.2.0}/src/airflow_unicore_integration/hooks/__init__.py +0 -0
- {airflow_unicore_integration-0.1.5 → airflow_unicore_integration-0.2.0}/src/airflow_unicore_integration/operators/__init__.py +0 -0
- {airflow_unicore_integration-0.1.5 → airflow_unicore_integration-0.2.0}/src/airflow_unicore_integration/policies/__init__.py +0 -0
- {airflow_unicore_integration-0.1.5 → airflow_unicore_integration-0.2.0}/src/airflow_unicore_integration.egg-info/dependency_links.txt +0 -0
- {airflow_unicore_integration-0.1.5 → airflow_unicore_integration-0.2.0}/src/airflow_unicore_integration.egg-info/entry_points.txt +0 -0
- {airflow_unicore_integration-0.1.5 → airflow_unicore_integration-0.2.0}/src/airflow_unicore_integration.egg-info/requires.txt +0 -0
- {airflow_unicore_integration-0.1.5 → airflow_unicore_integration-0.2.0}/src/airflow_unicore_integration.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: airflow-unicore-integration
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Running Unicore Jobs from airflow DAGs.
|
|
5
5
|
Author-email: Christian Böttcher <c.boettcher@fz-juelich.de>
|
|
6
6
|
License-Expression: BSD-3-Clause
|
|
@@ -24,7 +24,7 @@ Unicore Airflow Integration
|
|
|
24
24
|
|
|
25
25
|
|Generic badge|
|
|
26
26
|
|
|
27
|
-
.. |Generic badge| image:: https://github.com/UNICORE-EU/airflow-unicore-integration/actions/workflows/publish-to-pypi.yml/badge.svg
|
|
27
|
+
.. |Generic badge| image:: https://github.com/UNICORE-EU/airflow-unicore-integration/actions/workflows/publish-to-pypi.yml/badge.svg
|
|
28
28
|
:target: https://github.com/UNICORE-EU/airflow-unicore-integration/actions/workflows/publish-to-pypi.yml
|
|
29
29
|
|
|
30
30
|
This project integrates `UNICORE <https://github.com/UNICORE-EU>`_ and `Apache Airflow <https://airflow.apache.org/>`_.
|
|
@@ -34,6 +34,31 @@ Airflow is a platform to programmatically author, schedule and monitor workflows
|
|
|
34
34
|
In the current state, this projects provides a set of airflow `operators <https://airflow.apache.org/docs/apache-airflow/stable/core-concepts/operators.html>`_, which can be used as part of airflow workflows to submit jobs to Unicore.
|
|
35
35
|
The UnicoreExecutor only offers experimental support for airflow 3 so far. Further support is currently being worked on.
|
|
36
36
|
|
|
37
|
+
-------------------------
|
|
38
|
+
Using the UnicoreExecutor
|
|
39
|
+
-------------------------
|
|
40
|
+
|
|
41
|
+
To use the UnicoreExecutor, this library needs to be installed in your airflow environment and then some configuration work needs to be done.
|
|
42
|
+
|
|
43
|
+
How to configure these settigns is up to your deployment, as it uses the standard airflow configuration mechanism.
|
|
44
|
+
In case of a helm deployemnt via the official helm chart, you will need to use environment variables, as all unicore related options are not present in the chart and will cause schema-validation to fail.
|
|
45
|
+
|
|
46
|
+
All options fall under the [unicore.executor] section in airflow.cfg, or have the ``AIRFLOW__UNICORE_EXECUTOR__`` prefix as an environment variable.
|
|
47
|
+
|
|
48
|
+
========================= ============================================ ===========================================================================================
|
|
49
|
+
Option name default description
|
|
50
|
+
========================= ============================================ ===========================================================================================
|
|
51
|
+
EXECUTION_API_SERVER_URL <The default from the airflow config> The url to reach the airflow API server from the execution environment (e.g. compute nodes)
|
|
52
|
+
AUTH_TOKEN mandatory The unicore auth token to use for job submission
|
|
53
|
+
DEFAULT_URL http://localhost:8080/DEMO-SITE/rest/core The default unicore site to submit jobs to
|
|
54
|
+
DEFAULT_ENV mandatory The default activation script for a functional airflow environment on the execution machine
|
|
55
|
+
TMP_DIR /tmp A temporary directory to store data such as GitDagBundles
|
|
56
|
+
========================= ============================================ ===========================================================================================
|
|
57
|
+
|
|
58
|
+
The default env is loaded via ``. default_env.sh``, and must enable an environment, where python is available in a suitable version, and the ``apache-airflow-task-sdk`` and ``apache-airflow-providers-git`` packages are available. All other dependencies depend on the dags to be run, but must already be included in the environment.
|
|
59
|
+
|
|
60
|
+
A simple solution for this may be the "activate" script for a python venv. If the target systems requires additional commands to enable python (e.g. ``module load``), these may be added to the top of the activate script.
|
|
61
|
+
|
|
37
62
|
---------------------------
|
|
38
63
|
Using the Unicore Operators
|
|
39
64
|
---------------------------
|
|
@@ -5,7 +5,7 @@ Unicore Airflow Integration
|
|
|
5
5
|
|
|
6
6
|
|Generic badge|
|
|
7
7
|
|
|
8
|
-
.. |Generic badge| image:: https://github.com/UNICORE-EU/airflow-unicore-integration/actions/workflows/publish-to-pypi.yml/badge.svg
|
|
8
|
+
.. |Generic badge| image:: https://github.com/UNICORE-EU/airflow-unicore-integration/actions/workflows/publish-to-pypi.yml/badge.svg
|
|
9
9
|
:target: https://github.com/UNICORE-EU/airflow-unicore-integration/actions/workflows/publish-to-pypi.yml
|
|
10
10
|
|
|
11
11
|
This project integrates `UNICORE <https://github.com/UNICORE-EU>`_ and `Apache Airflow <https://airflow.apache.org/>`_.
|
|
@@ -15,6 +15,31 @@ Airflow is a platform to programmatically author, schedule and monitor workflows
|
|
|
15
15
|
In the current state, this projects provides a set of airflow `operators <https://airflow.apache.org/docs/apache-airflow/stable/core-concepts/operators.html>`_, which can be used as part of airflow workflows to submit jobs to Unicore.
|
|
16
16
|
The UnicoreExecutor only offers experimental support for airflow 3 so far. Further support is currently being worked on.
|
|
17
17
|
|
|
18
|
+
-------------------------
|
|
19
|
+
Using the UnicoreExecutor
|
|
20
|
+
-------------------------
|
|
21
|
+
|
|
22
|
+
To use the UnicoreExecutor, this library needs to be installed in your airflow environment and then some configuration work needs to be done.
|
|
23
|
+
|
|
24
|
+
How to configure these settigns is up to your deployment, as it uses the standard airflow configuration mechanism.
|
|
25
|
+
In case of a helm deployemnt via the official helm chart, you will need to use environment variables, as all unicore related options are not present in the chart and will cause schema-validation to fail.
|
|
26
|
+
|
|
27
|
+
All options fall under the [unicore.executor] section in airflow.cfg, or have the ``AIRFLOW__UNICORE_EXECUTOR__`` prefix as an environment variable.
|
|
28
|
+
|
|
29
|
+
========================= ============================================ ===========================================================================================
|
|
30
|
+
Option name default description
|
|
31
|
+
========================= ============================================ ===========================================================================================
|
|
32
|
+
EXECUTION_API_SERVER_URL <The default from the airflow config> The url to reach the airflow API server from the execution environment (e.g. compute nodes)
|
|
33
|
+
AUTH_TOKEN mandatory The unicore auth token to use for job submission
|
|
34
|
+
DEFAULT_URL http://localhost:8080/DEMO-SITE/rest/core The default unicore site to submit jobs to
|
|
35
|
+
DEFAULT_ENV mandatory The default activation script for a functional airflow environment on the execution machine
|
|
36
|
+
TMP_DIR /tmp A temporary directory to store data such as GitDagBundles
|
|
37
|
+
========================= ============================================ ===========================================================================================
|
|
38
|
+
|
|
39
|
+
The default env is loaded via ``. default_env.sh``, and must enable an environment, where python is available in a suitable version, and the ``apache-airflow-task-sdk`` and ``apache-airflow-providers-git`` packages are available. All other dependencies depend on the dags to be run, but must already be included in the environment.
|
|
40
|
+
|
|
41
|
+
A simple solution for this may be the "activate" script for a python venv. If the target systems requires additional commands to enable python (e.g. ``module load``), these may be added to the top of the activate script.
|
|
42
|
+
|
|
18
43
|
---------------------------
|
|
19
44
|
Using the Unicore Operators
|
|
20
45
|
---------------------------
|
|
@@ -19,8 +19,8 @@ from airflow.executors.workloads import All
|
|
|
19
19
|
from airflow.executors.workloads import ExecuteTask
|
|
20
20
|
from airflow.models.taskinstancekey import TaskInstanceKey
|
|
21
21
|
from airflow.utils.state import TaskInstanceState
|
|
22
|
-
|
|
23
|
-
from
|
|
22
|
+
from pyunicore import client
|
|
23
|
+
from pyunicore.credentials import create_credential
|
|
24
24
|
|
|
25
25
|
from ..util.job import JobDescriptionGenerator
|
|
26
26
|
from ..util.job import NaiveJobDescriptionGenerator
|
|
@@ -39,9 +39,14 @@ STATE_MAPPINGS: Dict[uc_client.JobStatus, TaskInstanceState] = {
|
|
|
39
39
|
|
|
40
40
|
class UnicoreExecutor(BaseExecutor):
|
|
41
41
|
|
|
42
|
+
EXECUTOR_CONFIG_UNICORE_CONN_KEY = (
|
|
43
|
+
"unicore_connection_id" # alternative connection id for the Unicore connection to use
|
|
44
|
+
)
|
|
45
|
+
EXECUTOR_CONFIG_UNICORE_SITE_KEY = "unicore_site" # alternative Unicore site to run at, only required if different than connection default
|
|
46
|
+
EXECUTOR_CONFIG_UNICORE_CREDENTIAL_KEY = "unicore_credential" # alternative unicore credential to use for the job, only required if different than connection default
|
|
47
|
+
|
|
42
48
|
def start(self):
|
|
43
49
|
self.active_jobs: Dict[TaskInstanceKey, uc_client.Job] = {}
|
|
44
|
-
self.uc_conn = unicore_hooks.UnicoreHook().get_conn()
|
|
45
50
|
# TODO get job description generator class and init params from config
|
|
46
51
|
self.job_descr_generator: JobDescriptionGenerator = NaiveJobDescriptionGenerator()
|
|
47
52
|
|
|
@@ -67,25 +72,25 @@ class UnicoreExecutor(BaseExecutor):
|
|
|
67
72
|
return []
|
|
68
73
|
|
|
69
74
|
def _get_unicore_client(self, executor_config: dict | None = {}):
|
|
70
|
-
|
|
71
|
-
return self.uc_conn
|
|
72
|
-
# END TODO fix this
|
|
73
|
-
# include client desires from executor_config
|
|
74
|
-
unicore_conn_id = executor_config.get( # type: ignore
|
|
75
|
-
UnicoreExecutor.EXECUTOR_CONFIG_UNICORE_CONN_KEY,
|
|
76
|
-
conf.get("unicore.executor", "UNICORE_CONN_ID"),
|
|
77
|
-
) # task can provide a different unicore connection to use, else airflow-wide default is used
|
|
78
|
-
self.log.info(f"Using base unicore connection with id '{unicore_conn_id}'")
|
|
79
|
-
hook = unicore_hooks.UnicoreHook(uc_conn_id=unicore_conn_id)
|
|
80
|
-
unicore_site = executor_config.get( # type: ignore
|
|
75
|
+
overwrite_unicore_site = executor_config.get( # type: ignore
|
|
81
76
|
UnicoreExecutor.EXECUTOR_CONFIG_UNICORE_SITE_KEY, None
|
|
82
77
|
) # task can provide a different site to run at, else default from connetion is used
|
|
83
|
-
|
|
78
|
+
overwrite_unicore_credential = executor_config.get( # type: ignore
|
|
84
79
|
UnicoreExecutor.EXECUTOR_CONFIG_UNICORE_CREDENTIAL_KEY, None
|
|
85
80
|
) # task can provide a different credential to use, else default from connection is used
|
|
86
|
-
|
|
87
|
-
|
|
81
|
+
token = conf.get("unicore.executor", "AUTH_TOKEN", fallback="")
|
|
82
|
+
base_url = conf.get(
|
|
83
|
+
"unicore.executor", "DEFAULT_URL", fallback="http://localhost:8080/DEMO-SITE/rest/core"
|
|
88
84
|
)
|
|
85
|
+
credential = create_credential(token=token)
|
|
86
|
+
if overwrite_unicore_site is not None:
|
|
87
|
+
base_url = overwrite_unicore_site
|
|
88
|
+
if overwrite_unicore_credential is not None:
|
|
89
|
+
credential = overwrite_unicore_credential
|
|
90
|
+
if not base_url:
|
|
91
|
+
raise TypeError()
|
|
92
|
+
conn = client.Client(credential, base_url)
|
|
93
|
+
return conn
|
|
89
94
|
|
|
90
95
|
def _submit_job(self, workload: ExecuteTask):
|
|
91
96
|
uc_client = self._get_unicore_client(executor_config=workload.ti.executor_config)
|
|
@@ -1,8 +1,14 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from airflow.providers.common.compat.sdk import BaseHook
|
|
4
7
|
from pyunicore import client
|
|
5
8
|
from pyunicore import credentials
|
|
9
|
+
from wtforms import StringField
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
6
12
|
|
|
7
13
|
|
|
8
14
|
class UnicoreHook(BaseHook):
|
|
@@ -23,25 +29,59 @@ class UnicoreHook(BaseHook):
|
|
|
23
29
|
super().__init__()
|
|
24
30
|
self.uc_conn_id = uc_conn_id
|
|
25
31
|
|
|
32
|
+
@classmethod
|
|
33
|
+
def get_connection_form_fields(cls):
|
|
34
|
+
return {"auth_token": StringField("Auth Token")}
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def get_ui_field_behaviour(cls) -> dict[str, Any]:
|
|
38
|
+
"""Return custom UI field behaviour for UNICORE connection."""
|
|
39
|
+
return {
|
|
40
|
+
"hidden_fields": ["schema", "port", "extra"],
|
|
41
|
+
"relabeling": {
|
|
42
|
+
"login": "Username",
|
|
43
|
+
},
|
|
44
|
+
"placeholder": {"auth_token": "UNICORE auth token"},
|
|
45
|
+
}
|
|
46
|
+
|
|
26
47
|
def get_conn(
|
|
27
48
|
self,
|
|
28
49
|
overwrite_base_url: str | None = None,
|
|
29
50
|
overwrite_credential: credentials.Credential | None = None,
|
|
30
51
|
) -> client.Client:
|
|
31
52
|
"""Return a Unicore Client. base_url and credentials may be overwritten."""
|
|
32
|
-
|
|
53
|
+
logger.debug(
|
|
33
54
|
f"Gettig connection with id '{self.uc_conn_id}' from secrets backend. Will be modified with user input for UNICORE."
|
|
34
55
|
)
|
|
35
56
|
params = self.get_connection(self.uc_conn_id)
|
|
36
57
|
base_url = params.host
|
|
37
58
|
credential = credentials.UsernamePassword(params.login, params.password)
|
|
59
|
+
auth_token = params.extra_dejson.get("auth_token", None)
|
|
60
|
+
if auth_token is not None:
|
|
61
|
+
credential = credentials.create_credential(token=auth_token)
|
|
38
62
|
if overwrite_base_url is not None:
|
|
39
63
|
base_url = overwrite_base_url
|
|
40
64
|
if overwrite_credential is not None:
|
|
41
65
|
credential = overwrite_credential
|
|
66
|
+
if not base_url:
|
|
67
|
+
raise TypeError()
|
|
42
68
|
conn = client.Client(credential, base_url)
|
|
43
69
|
return conn
|
|
44
70
|
|
|
71
|
+
def get_credential(self) -> credentials.Credential:
|
|
72
|
+
"""Return the credential part of the connection as a Credential object."""
|
|
73
|
+
params = self.get_connection(self.uc_conn_id)
|
|
74
|
+
credential = credentials.UsernamePassword(params.login, params.password)
|
|
75
|
+
auth_token = params.extra_dejson.get("auth_token", None)
|
|
76
|
+
if auth_token is not None:
|
|
77
|
+
credential = credentials.create_credential(token=auth_token)
|
|
78
|
+
return credential
|
|
79
|
+
|
|
80
|
+
def get_base_url(self) -> str:
|
|
81
|
+
"""Return the base url of the connection."""
|
|
82
|
+
params = self.get_connection(self.uc_conn_id)
|
|
83
|
+
return params.host
|
|
84
|
+
|
|
45
85
|
def test_connection(self) -> tuple[bool, str]:
|
|
46
86
|
"""Test the connection by sending an access_info request"""
|
|
47
87
|
conn = self.get_conn()
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
from airflow_unicore_integration.operators.unicore_operators import (
|
|
5
|
+
UnicoreGenericOperator,
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class UnicoreContainerOperator(UnicoreGenericOperator):
|
|
10
|
+
def __init__(
|
|
11
|
+
self, name: str, docker_image_url: str, command: str, options: str | None = None, **kwargs
|
|
12
|
+
):
|
|
13
|
+
params: Dict[str, str | List[str]] = {"COMMAND": command, "IMAGE_URL": docker_image_url}
|
|
14
|
+
if options is not None:
|
|
15
|
+
params["OPTIONS"] = options
|
|
16
|
+
super().__init__(
|
|
17
|
+
name=name,
|
|
18
|
+
application_name="CONTAINER",
|
|
19
|
+
application_version="1.0",
|
|
20
|
+
parameters=params,
|
|
21
|
+
**kwargs,
|
|
22
|
+
)
|
|
@@ -6,7 +6,7 @@ from typing import Sequence
|
|
|
6
6
|
|
|
7
7
|
import pyunicore.client as uc_client
|
|
8
8
|
import pyunicore.credentials as uc_credentials
|
|
9
|
-
from airflow.
|
|
9
|
+
from airflow.sdk.bases.operator import BaseOperator
|
|
10
10
|
from airflow.utils.context import Context
|
|
11
11
|
|
|
12
12
|
from airflow_unicore_integration.hooks import unicore_hooks
|
|
@@ -62,6 +62,7 @@ class UnicoreGenericOperator(BaseOperator):
|
|
|
62
62
|
credential_username: str | None = None,
|
|
63
63
|
credential_password: str | None = None,
|
|
64
64
|
credential_token: str | None = None,
|
|
65
|
+
conn_id: str | None = None,
|
|
65
66
|
**kwargs,
|
|
66
67
|
):
|
|
67
68
|
"""
|
|
@@ -114,6 +115,7 @@ class UnicoreGenericOperator(BaseOperator):
|
|
|
114
115
|
self.credential_username = credential_username
|
|
115
116
|
self.credential_password = credential_password
|
|
116
117
|
self.credential_token = credential_token
|
|
118
|
+
self.conn_id = conn_id
|
|
117
119
|
|
|
118
120
|
self.validate_job_description()
|
|
119
121
|
logger.debug("created Unicore Job Task")
|
|
@@ -262,7 +264,7 @@ class UnicoreGenericOperator(BaseOperator):
|
|
|
262
264
|
|
|
263
265
|
def execute_async(self, context: Context) -> Any:
|
|
264
266
|
"""Submits the job and returns the job object without waiting for it to finish."""
|
|
265
|
-
client = self.get_uc_client()
|
|
267
|
+
client = self.get_uc_client(self.conn_id)
|
|
266
268
|
job = client.new_job(job_description=self.get_job_description(), inputs=[])
|
|
267
269
|
return job
|
|
268
270
|
|
|
@@ -1,10 +1,16 @@
|
|
|
1
|
-
import
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
2
3
|
from typing import Any
|
|
3
4
|
from typing import Dict
|
|
4
5
|
|
|
5
6
|
from airflow.configuration import conf
|
|
6
7
|
from airflow.executors.workloads import ExecuteTask
|
|
7
8
|
from airflow.models.taskinstancekey import TaskInstanceKey
|
|
9
|
+
from airflow.providers.git.hooks.git import GitHook
|
|
10
|
+
|
|
11
|
+
from .launch_script_content import LAUNCH_SCRIPT_CONTENT_STR
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
8
14
|
|
|
9
15
|
|
|
10
16
|
class JobDescriptionGenerator:
|
|
@@ -18,6 +24,8 @@ class JobDescriptionGenerator:
|
|
|
18
24
|
EXECUTOR_CONFIG_PARAMETERS = "Parameters" # gets added to the unicore job description
|
|
19
25
|
EXECUTOR_CONFIG_PROJECT = "Project" # gets added to the unicore job description
|
|
20
26
|
EXECUTOR_CONFIG_PRE_COMMANDS = "precommands" # gets added to the unicore job description
|
|
27
|
+
EXECUTOR_CONFIG_POST_COMMANDS = "postcommands" # gets added to the unicore job descirption
|
|
28
|
+
EXECUTOR_CONFIG_JOB_TYPE = "job_type"
|
|
21
29
|
EXECUTOR_CONFIG_UNICORE_CONN_KEY = (
|
|
22
30
|
"unicore_connection_id" # alternative connection id for the Unicore connection to use
|
|
23
31
|
)
|
|
@@ -27,12 +35,17 @@ class JobDescriptionGenerator:
|
|
|
27
35
|
def create_job_description(self, workload: ExecuteTask) -> Dict[str, Any]:
|
|
28
36
|
raise NotImplementedError()
|
|
29
37
|
|
|
38
|
+
def get_job_name(self, key: TaskInstanceKey) -> str:
|
|
39
|
+
return f"{key.dag_id} - {key.task_id} - {key.run_id} - {key.try_number}"
|
|
40
|
+
|
|
30
41
|
|
|
31
42
|
class NaiveJobDescriptionGenerator(JobDescriptionGenerator):
|
|
32
43
|
"""
|
|
33
|
-
This class generates a naive unicore job, that expects there to be a working python env
|
|
44
|
+
This class generates a naive unicore job, that expects there to be a working python env containing airflow and any other required dependencies on the executing system.
|
|
34
45
|
"""
|
|
35
46
|
|
|
47
|
+
GIT_DAG_BUNDLE_CLASSPATH = "airflow.providers.git.bundles.git.GitDagBundle"
|
|
48
|
+
|
|
36
49
|
def create_job_description(self, workload: ExecuteTask) -> Dict[str, Any]:
|
|
37
50
|
key: TaskInstanceKey = workload.ti.key
|
|
38
51
|
executor_config = workload.ti.executor_config
|
|
@@ -46,6 +59,8 @@ class NaiveJobDescriptionGenerator(JobDescriptionGenerator):
|
|
|
46
59
|
user_added_resources: Dict[str, str] = executor_config.get(JobDescriptionGenerator.EXECUTOR_CONFIG_RESOURCES, None) # type: ignore
|
|
47
60
|
user_added_pre_commands: list[str] = executor_config.get(JobDescriptionGenerator.EXECUTOR_CONFIG_PRE_COMMANDS, []) # type: ignore
|
|
48
61
|
user_defined_python_env: str = workload.ti.executor_config.get(JobDescriptionGenerator.EXECUTOR_CONFIG_PYTHON_ENV_KEY, None) # type: ignore
|
|
62
|
+
user_added_post_commands: list[str] = executor_config.get(JobDescriptionGenerator.EXECUTOR_CONFIG_POST_COMMANDS, []) # type: ignore
|
|
63
|
+
user_defined_job_type: str = executor_config.get(JobDescriptionGenerator.EXECUTOR_CONFIG_JOB_TYPE, None) # type: ignore
|
|
49
64
|
# get local dag path from cmd and fix dag path in arguments
|
|
50
65
|
dag_rel_path = str(workload.dag_rel_path)
|
|
51
66
|
if dag_rel_path.startswith("DAG_FOLDER"):
|
|
@@ -56,56 +71,115 @@ class NaiveJobDescriptionGenerator(JobDescriptionGenerator):
|
|
|
56
71
|
server = conf.get(
|
|
57
72
|
"unicore.executor", "execution_api_server_url", fallback=default_execution_api_server
|
|
58
73
|
)
|
|
74
|
+
logger.debug(f"Server is {server}")
|
|
75
|
+
|
|
76
|
+
# set job type
|
|
77
|
+
if user_defined_job_type:
|
|
78
|
+
job_descr_dict["Job type"] = user_defined_job_type
|
|
59
79
|
|
|
60
80
|
# check which python virtualenv to use
|
|
61
81
|
if user_defined_python_env:
|
|
62
82
|
python_env = user_defined_python_env
|
|
63
83
|
else:
|
|
64
84
|
python_env = conf.get("unicore.executor", "DEFAULT_ENV")
|
|
85
|
+
tmp_dir = conf.get("unicore.executor", "TMP_DIR", "/tmp")
|
|
65
86
|
# prepare dag file to be uploaded via unicore
|
|
66
87
|
# dag_file = open("/tmp/test")
|
|
67
88
|
# dag_content = dag_file.readlines()
|
|
68
89
|
# dag_import = {"To": dag_rel_path, "Data": dag_content}
|
|
69
90
|
worker_script_import = {
|
|
70
91
|
"To": "run_task_via_supervisor.py",
|
|
71
|
-
"From": "https://gist.githubusercontent.com/cboettcher/3f1101a1d1b67e7944d17c02ecd69930/raw/1d90bf38199d8c0adf47a79c8840c3e3ddf57462/run_task_via_supervisor.py",
|
|
92
|
+
# "From": "https://gist.githubusercontent.com/cboettcher/3f1101a1d1b67e7944d17c02ecd69930/raw/1d90bf38199d8c0adf47a79c8840c3e3ddf57462/run_task_via_supervisor.py",
|
|
93
|
+
"Data": LAUNCH_SCRIPT_CONTENT_STR,
|
|
72
94
|
}
|
|
73
95
|
# start filling the actual job description
|
|
74
|
-
job_descr_dict["Name"] =
|
|
96
|
+
job_descr_dict["Name"] = self.get_job_name(key)
|
|
75
97
|
job_descr_dict["Executable"] = (
|
|
76
|
-
"python" # TODO may require module load to be setup for some systems
|
|
98
|
+
f". airflow_config.env && . {python_env} && python run_task_via_supervisor.py --json-string '{workload.model_dump_json()}'" # TODO may require module load to be setup for some systems
|
|
77
99
|
)
|
|
78
|
-
job_descr_dict["Arguments"] = [
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
100
|
+
# job_descr_dict["Arguments"] = [
|
|
101
|
+
# "-c",
|
|
102
|
+
# "source airflow_config.env",
|
|
103
|
+
# "source {python_env}/bin/activate",
|
|
104
|
+
# "python",
|
|
105
|
+
# "run_task_via_supervisor.py",
|
|
106
|
+
# f"--json-string '{workload.model_dump_json()}'",
|
|
107
|
+
# ]
|
|
108
|
+
|
|
82
109
|
job_descr_dict["Environment"] = {
|
|
83
110
|
"AIRFLOW__CORE__EXECUTION_API_SERVER_URL": server,
|
|
84
|
-
"AIRFLOW__CORE__DAGS_FOLDER": "./",
|
|
111
|
+
# "AIRFLOW__CORE__DAGS_FOLDER": "./",
|
|
85
112
|
"AIRFLOW__LOGGING__LOGGING_LEVEL": "DEBUG",
|
|
86
113
|
"AIRFLOW__CORE__EXECUTOR": "LocalExecutor,airflow_unicore_integration.executors.unicore_executor.UnicoreExecutor",
|
|
87
114
|
}
|
|
88
115
|
|
|
89
116
|
# build filecontent string for importing in the job | this is needed to avoid confusing nested quotes and trying to escape them properly when using unicore env vars directly
|
|
90
|
-
env_file_content: list[str] = [
|
|
91
|
-
|
|
92
|
-
|
|
117
|
+
env_file_content: list[str] = []
|
|
118
|
+
|
|
119
|
+
# transmit needed dag bundle information (and possibly files) to job directory
|
|
120
|
+
bundle_str = conf.get("dag.processor", "dag_bundle_config_list")
|
|
121
|
+
logger.debug(f"Dag Bundle config is: {bundle_str}")
|
|
122
|
+
bundle_dict = json.loads(bundle_str)
|
|
123
|
+
conn_id_to_transmit = None
|
|
124
|
+
bundle_type = None
|
|
93
125
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
126
|
+
for bundle in bundle_dict:
|
|
127
|
+
if bundle["name"] == workload.bundle_info.name:
|
|
128
|
+
if bundle["classpath"] == NaiveJobDescriptionGenerator.GIT_DAG_BUNDLE_CLASSPATH:
|
|
129
|
+
bundle_type = NaiveJobDescriptionGenerator.GIT_DAG_BUNDLE_CLASSPATH
|
|
130
|
+
env_file_content.append(
|
|
131
|
+
f"export AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_CONFIG_LIST='[{json.dumps(bundle)}]'"
|
|
132
|
+
)
|
|
133
|
+
conn_id_to_transmit = bundle["kwargs"]["git_conn_id"]
|
|
134
|
+
break
|
|
135
|
+
# TODO handle other bundle types
|
|
136
|
+
|
|
137
|
+
if bundle_type:
|
|
138
|
+
if (
|
|
139
|
+
bundle_type == NaiveJobDescriptionGenerator.GIT_DAG_BUNDLE_CLASSPATH
|
|
140
|
+
and conn_id_to_transmit
|
|
141
|
+
):
|
|
142
|
+
git_hook = GitHook(conn_id_to_transmit)
|
|
143
|
+
git_remote_url = git_hook.repo_url
|
|
144
|
+
git_dir_prefix = f"{tmp_dir}/{workload.ti.dag_id}/{workload.ti.task_id}/{workload.ti.run_id}/{workload.ti.try_number}"
|
|
145
|
+
git_local_url = f"{git_dir_prefix}/dagmirror"
|
|
146
|
+
dag_bundle_path = f"{git_dir_prefix}/dagbundle"
|
|
147
|
+
# add precommand to clone repo on login node
|
|
148
|
+
git_precommand = f". {python_env} && mkdir -p {git_local_url} && mkdir -p {dag_bundle_path} && git clone {git_remote_url} {git_local_url}"
|
|
149
|
+
job_descr_dict["Environment"][
|
|
150
|
+
"AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_STORAGE_PATH"
|
|
151
|
+
] = f"{dag_bundle_path}"
|
|
152
|
+
logger.info(f"git precommand is {git_precommand}")
|
|
153
|
+
user_added_pre_commands.append(git_precommand)
|
|
154
|
+
# add connection to local clone to env of job
|
|
155
|
+
airflow_conn_string = json.dumps(
|
|
156
|
+
{"conn_type": "git", "host": f"file://{git_local_url}"}
|
|
157
|
+
)
|
|
158
|
+
env_file_content.append(
|
|
159
|
+
f"export AIRFLOW_CONN_{str(conn_id_to_transmit).upper()}='{airflow_conn_string}'"
|
|
160
|
+
)
|
|
161
|
+
logger.info(f"connection is '{airflow_conn_string}'")
|
|
162
|
+
# add cleanup of local git repo to job description
|
|
163
|
+
git_cleanup_command = f"rm -r {git_dir_prefix}"
|
|
164
|
+
logger.info(f"git cleanup is {git_cleanup_command}")
|
|
165
|
+
user_added_post_commands.append(git_cleanup_command)
|
|
98
166
|
|
|
99
167
|
airflow_env_import = {"To": "airflow_config.env", "Data": env_file_content}
|
|
100
168
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
)
|
|
104
|
-
|
|
169
|
+
job_descr_dict["Imports"] = [worker_script_import, airflow_env_import]
|
|
170
|
+
|
|
171
|
+
if len(user_added_pre_commands) > 0:
|
|
172
|
+
precommand_import = {"To": "precommand.sh", "Data": user_added_pre_commands}
|
|
173
|
+
job_descr_dict["Imports"].append(precommand_import)
|
|
174
|
+
job_descr_dict["User precommand"] = "bash precommand.sh"
|
|
175
|
+
if len(user_added_post_commands) > 0:
|
|
176
|
+
postcommand_import = {"To": "postcommand.sh", "Data": user_added_post_commands}
|
|
177
|
+
job_descr_dict["Imports"].append(postcommand_import)
|
|
178
|
+
job_descr_dict["User postcommand"] = "bash postcommand.sh"
|
|
179
|
+
|
|
105
180
|
job_descr_dict["RunUserPrecommandOnLoginNode"] = (
|
|
106
|
-
"
|
|
181
|
+
"true" # precommand needs public internet access to clone dag repos
|
|
107
182
|
)
|
|
108
|
-
job_descr_dict["Imports"] = [worker_script_import, airflow_env_import]
|
|
109
183
|
# add user defined options to description
|
|
110
184
|
if user_added_env:
|
|
111
185
|
job_descr_dict["Environment"].update(user_added_env)
|
airflow_unicore_integration-0.2.0/src/airflow_unicore_integration/util/launch_script_content.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
LAUNCH_SCRIPT_CONTENT_STR = '''# get some debug info if before anything can fail
|
|
2
|
+
import sys
|
|
3
|
+
print(sys.executable)
|
|
4
|
+
|
|
5
|
+
from airflow.executors import workloads
|
|
6
|
+
from airflow.sdk.execution_time.supervisor import supervise
|
|
7
|
+
from airflow.configuration import conf
|
|
8
|
+
from pydantic import TypeAdapter
|
|
9
|
+
import argparse
|
|
10
|
+
import structlog
|
|
11
|
+
import sys
|
|
12
|
+
log = structlog.get_logger(logger_name=__name__)
|
|
13
|
+
|
|
14
|
+
"""
|
|
15
|
+
Usage:
|
|
16
|
+
|
|
17
|
+
python run_task_via_supervisor.py [--json-string <workload string> | --json-file <workload filepath>]
|
|
18
|
+
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def execute_workload_locally(workload: workloads.All):
|
|
23
|
+
if not isinstance(workload, workloads.ExecuteTask):
|
|
24
|
+
raise ValueError(f"Executor does not know how to handle {type(workload)}")
|
|
25
|
+
|
|
26
|
+
base_url = conf.get("api", "base_url", fallback="/")
|
|
27
|
+
default_execution_api_server = f"{base_url.rstrip('/')}/execution/"
|
|
28
|
+
server = conf.get("core", "execution_api_server_url", fallback=default_execution_api_server)
|
|
29
|
+
log.info(f"Connecting to server:{server}" )
|
|
30
|
+
|
|
31
|
+
log.debug(f"Workload is: {workload}")
|
|
32
|
+
log.debug(f"Dag Bundle is: {workload.bundle_info}")
|
|
33
|
+
|
|
34
|
+
supervise(
|
|
35
|
+
# This is the "wrong" ti type, but it duck types the same. TODO: Create a protocol for this.
|
|
36
|
+
ti=workload.ti, # type: ignore[arg-type]
|
|
37
|
+
dag_rel_path=workload.dag_rel_path,
|
|
38
|
+
bundle_info=workload.bundle_info,
|
|
39
|
+
token=workload.token,
|
|
40
|
+
server=server,
|
|
41
|
+
log_path=workload.log_path,
|
|
42
|
+
# Include the output of the task to stdout too, so that in process logs can be read from via
|
|
43
|
+
# unicore as job logs.
|
|
44
|
+
subprocess_logs_to_stdout=True,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def main():
|
|
49
|
+
parser = argparse.ArgumentParser(
|
|
50
|
+
description="Execute a workload in a Containerised executor using the task SDK."
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Create a mutually exclusive group to ensure that only one of the flags is set
|
|
54
|
+
group = parser.add_mutually_exclusive_group(required=True)
|
|
55
|
+
group.add_argument(
|
|
56
|
+
"--json-path",
|
|
57
|
+
help="Path to the input JSON file containing the execution workload payload.",
|
|
58
|
+
type=str,
|
|
59
|
+
)
|
|
60
|
+
group.add_argument(
|
|
61
|
+
"--json-string",
|
|
62
|
+
help="The JSON string itself containing the execution workload payload.",
|
|
63
|
+
type=str,
|
|
64
|
+
)
|
|
65
|
+
args = parser.parse_args()
|
|
66
|
+
|
|
67
|
+
decoder = TypeAdapter[workloads.All](workloads.All)
|
|
68
|
+
|
|
69
|
+
if args.json_path:
|
|
70
|
+
try:
|
|
71
|
+
with open(args.json_path) as file:
|
|
72
|
+
input_data = file.read()
|
|
73
|
+
workload = decoder.validate_json(input_data)
|
|
74
|
+
except Exception as e: # noqa: B902
|
|
75
|
+
log.error("Failed to read file", error=str(e))
|
|
76
|
+
sys.exit(1)
|
|
77
|
+
|
|
78
|
+
elif args.json_string:
|
|
79
|
+
try:
|
|
80
|
+
workload = decoder.validate_json(args.json_string)
|
|
81
|
+
except Exception as e:
|
|
82
|
+
log.error("Failed to parse input JSON string", error=str(e))
|
|
83
|
+
sys.exit(1)
|
|
84
|
+
|
|
85
|
+
execute_workload_locally(workload)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
if __name__ == "__main__":
|
|
89
|
+
main()
|
|
90
|
+
'''
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: airflow-unicore-integration
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Running Unicore Jobs from airflow DAGs.
|
|
5
5
|
Author-email: Christian Böttcher <c.boettcher@fz-juelich.de>
|
|
6
6
|
License-Expression: BSD-3-Clause
|
|
@@ -24,7 +24,7 @@ Unicore Airflow Integration
|
|
|
24
24
|
|
|
25
25
|
|Generic badge|
|
|
26
26
|
|
|
27
|
-
.. |Generic badge| image:: https://github.com/UNICORE-EU/airflow-unicore-integration/actions/workflows/publish-to-pypi.yml/badge.svg
|
|
27
|
+
.. |Generic badge| image:: https://github.com/UNICORE-EU/airflow-unicore-integration/actions/workflows/publish-to-pypi.yml/badge.svg
|
|
28
28
|
:target: https://github.com/UNICORE-EU/airflow-unicore-integration/actions/workflows/publish-to-pypi.yml
|
|
29
29
|
|
|
30
30
|
This project integrates `UNICORE <https://github.com/UNICORE-EU>`_ and `Apache Airflow <https://airflow.apache.org/>`_.
|
|
@@ -34,6 +34,31 @@ Airflow is a platform to programmatically author, schedule and monitor workflows
|
|
|
34
34
|
In the current state, this projects provides a set of airflow `operators <https://airflow.apache.org/docs/apache-airflow/stable/core-concepts/operators.html>`_, which can be used as part of airflow workflows to submit jobs to Unicore.
|
|
35
35
|
The UnicoreExecutor only offers experimental support for airflow 3 so far. Further support is currently being worked on.
|
|
36
36
|
|
|
37
|
+
-------------------------
|
|
38
|
+
Using the UnicoreExecutor
|
|
39
|
+
-------------------------
|
|
40
|
+
|
|
41
|
+
To use the UnicoreExecutor, this library needs to be installed in your airflow environment and then some configuration work needs to be done.
|
|
42
|
+
|
|
43
|
+
How to configure these settigns is up to your deployment, as it uses the standard airflow configuration mechanism.
|
|
44
|
+
In case of a helm deployemnt via the official helm chart, you will need to use environment variables, as all unicore related options are not present in the chart and will cause schema-validation to fail.
|
|
45
|
+
|
|
46
|
+
All options fall under the [unicore.executor] section in airflow.cfg, or have the ``AIRFLOW__UNICORE_EXECUTOR__`` prefix as an environment variable.
|
|
47
|
+
|
|
48
|
+
========================= ============================================ ===========================================================================================
|
|
49
|
+
Option name default description
|
|
50
|
+
========================= ============================================ ===========================================================================================
|
|
51
|
+
EXECUTION_API_SERVER_URL <The default from the airflow config> The url to reach the airflow API server from the execution environment (e.g. compute nodes)
|
|
52
|
+
AUTH_TOKEN mandatory The unicore auth token to use for job submission
|
|
53
|
+
DEFAULT_URL http://localhost:8080/DEMO-SITE/rest/core The default unicore site to submit jobs to
|
|
54
|
+
DEFAULT_ENV mandatory The default activation script for a functional airflow environment on the execution machine
|
|
55
|
+
TMP_DIR /tmp A temporary directory to store data such as GitDagBundles
|
|
56
|
+
========================= ============================================ ===========================================================================================
|
|
57
|
+
|
|
58
|
+
The default env is loaded via ``. default_env.sh``, and must enable an environment, where python is available in a suitable version, and the ``apache-airflow-task-sdk`` and ``apache-airflow-providers-git`` packages are available. All other dependencies depend on the dags to be run, but must already be included in the environment.
|
|
59
|
+
|
|
60
|
+
A simple solution for this may be the "activate" script for a python venv. If the target systems requires additional commands to enable python (e.g. ``module load``), these may be added to the top of the activate script.
|
|
61
|
+
|
|
37
62
|
---------------------------
|
|
38
63
|
Using the Unicore Operators
|
|
39
64
|
---------------------------
|
|
@@ -14,6 +14,8 @@ src/airflow_unicore_integration/executors/unicore_executor.py
|
|
|
14
14
|
src/airflow_unicore_integration/hooks/__init__.py
|
|
15
15
|
src/airflow_unicore_integration/hooks/unicore_hooks.py
|
|
16
16
|
src/airflow_unicore_integration/operators/__init__.py
|
|
17
|
+
src/airflow_unicore_integration/operators/container.py
|
|
17
18
|
src/airflow_unicore_integration/operators/unicore_operators.py
|
|
18
19
|
src/airflow_unicore_integration/policies/__init__.py
|
|
19
|
-
src/airflow_unicore_integration/util/job.py
|
|
20
|
+
src/airflow_unicore_integration/util/job.py
|
|
21
|
+
src/airflow_unicore_integration/util/launch_script_content.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|