airflow-unicore-integration 0.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow_unicore_integration-0.0.4/PKG-INFO +16 -0
- airflow_unicore_integration-0.0.4/README.rst +138 -0
- airflow_unicore_integration-0.0.4/pyproject.toml +36 -0
- airflow_unicore_integration-0.0.4/setup.cfg +4 -0
- airflow_unicore_integration-0.0.4/src/airflow_unicore_integration/__init__.py +10 -0
- airflow_unicore_integration-0.0.4/src/airflow_unicore_integration/hooks/__init__.py +0 -0
- airflow_unicore_integration-0.0.4/src/airflow_unicore_integration/hooks/unicore_hooks.py +49 -0
- airflow_unicore_integration-0.0.4/src/airflow_unicore_integration/operators/__init__.py +0 -0
- airflow_unicore_integration-0.0.4/src/airflow_unicore_integration/operators/unicore_operators.py +324 -0
- airflow_unicore_integration-0.0.4/src/airflow_unicore_integration.egg-info/PKG-INFO +16 -0
- airflow_unicore_integration-0.0.4/src/airflow_unicore_integration.egg-info/SOURCES.txt +13 -0
- airflow_unicore_integration-0.0.4/src/airflow_unicore_integration.egg-info/dependency_links.txt +1 -0
- airflow_unicore_integration-0.0.4/src/airflow_unicore_integration.egg-info/entry_points.txt +2 -0
- airflow_unicore_integration-0.0.4/src/airflow_unicore_integration.egg-info/requires.txt +2 -0
- airflow_unicore_integration-0.0.4/src/airflow_unicore_integration.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: airflow-unicore-integration
|
|
3
|
+
Version: 0.0.4
|
|
4
|
+
Summary: Running Unicore Jobs from airflow DAGs.
|
|
5
|
+
Author-email: Christian Böttcher <c.boettcher@fz-juelich.de>
|
|
6
|
+
Project-URL: Homepage, https://gitlab.jsc.fz-juelich.de/boettcher1/airflow_unicore_integration
|
|
7
|
+
Project-URL: Issues, https://gitlab.jsc.fz-juelich.de/boettcher1/airflow_unicore_integration/-/issues
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Framework :: Apache Airflow :: Provider
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: pyunicore>=1.0.0
|
|
16
|
+
Requires-Dist: apache-airflow==2.8.4
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
===========================
|
|
2
|
+
Unicore Airflow Integration
|
|
3
|
+
===========================
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
---------------------------
|
|
7
|
+
Using the Unicore Operators
|
|
8
|
+
---------------------------
|
|
9
|
+
|
|
10
|
+
There are multiple Unicore operators provided by this package. The most versatile one is the ``UnicoreGenericOperator``, which supports a lot of job parameters.
|
|
11
|
+
All other operators are intended to offer a slightly less complex constructor, and therefore simpler usage, but all generic parameters are still available to be used.
|
|
12
|
+
|
|
13
|
+
All operators support all possible parameters of the `Unicore job description <https://unicore-docs.readthedocs.io/en/latest/user-docs/rest-api/job-description/index.html#overview>`_. Here is an excerpt containing some commonly used parameters:
|
|
14
|
+
|
|
15
|
+
======================= ======================= =========================================== ====================
|
|
16
|
+
parameter name type default description
|
|
17
|
+
======================= ======================= =========================================== ====================
|
|
18
|
+
application_name str None Application Name
|
|
19
|
+
application_version str None Application Version
|
|
20
|
+
executable str None Command line executable
|
|
21
|
+
arguments List(str) None Command line arguments
|
|
22
|
+
environment Map(str,str) None environment arguments
|
|
23
|
+
parameters Map None Application Parameters
|
|
24
|
+
project str None Accounting Project
|
|
25
|
+
imports List(imports) None Stage-in/data import - see Unicore docs
|
|
26
|
+
exports List(exports) None Stage-out/data export - see Unicore docs
|
|
27
|
+
======================= ======================= =========================================== ====================
|
|
28
|
+
|
|
29
|
+
For imports and exports go `here <https://unicore-docs.readthedocs.io/en/latest/user-docs/rest-api/job-description/index.html#importing-files-into-the-job-workspace>`_ for details.
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
The ``UnicoreGenericOperator`` supports the following additional parameters:
|
|
33
|
+
|
|
34
|
+
======================= ======================= =========================================== ====================
|
|
35
|
+
parameter name type default description
|
|
36
|
+
======================= ======================= =========================================== ====================
|
|
37
|
+
name str None name for the airflow task and the Unicore job
|
|
38
|
+
xcom_output_files List(str) ["stdout","stderr"] list of files of which the content should be put into xcoms
|
|
39
|
+
base_url str configured in airflow connections or None The base URL of the UNICOREX server to be used for the Unicore client
|
|
40
|
+
credential pyunicore credential configured in airflow connections or None A Unicore Credential to be used for the Unicore client
|
|
41
|
+
credential_username str configured in airflow connections or None Username for the Unicore client credentials
|
|
42
|
+
credential_password str configured in airflow connections or None Password the the Unicore client credentials
|
|
43
|
+
credential_token str configured in airflow connections or None An OIDC token to be used by the Unicore client
|
|
44
|
+
======================= ======================= =========================================== ====================
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
The ``UnicoreScriptOperator`` offers a way to more easily submit a script as a job, where the script content can be provided as a string.
|
|
48
|
+
|
|
49
|
+
======================= ======================= =========================================== ====================
|
|
50
|
+
parameter name type default description
|
|
51
|
+
======================= ======================= =========================================== ====================
|
|
52
|
+
script_content str None The content of the script file
|
|
53
|
+
======================= ======================= =========================================== ====================
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
The ``UnicoreBSSOperator`` offers a way to directly submit batch-scripts from their content-strings.
|
|
57
|
+
|
|
58
|
+
======================= ======================= =========================================== ====================
|
|
59
|
+
parameter name type default description
|
|
60
|
+
======================= ======================= =========================================== ====================
|
|
61
|
+
bss_file_content str None The content of the batch script file
|
|
62
|
+
======================= ======================= =========================================== ====================
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
The ``UnicoreExecutableOperator`` offers a reduced constructor that only requires an executable.
|
|
66
|
+
|
|
67
|
+
======================= ======================= =========================================== ====================
|
|
68
|
+
parameter name type default description
|
|
69
|
+
======================= ======================= =========================================== ====================
|
|
70
|
+
executable str None The executable to run for this job
|
|
71
|
+
xcom_output_files List(str) ["stdout","stderr"] list of files of which the content should be put into xcoms
|
|
72
|
+
======================= ======================= =========================================== ====================
|
|
73
|
+
|
|
74
|
+
The ``UnicoreDateOperator`` is more of a testing operator, since it will only run the ``date`` executable.
|
|
75
|
+
|
|
76
|
+
-------------------------------
|
|
77
|
+
Behaviour on Errors and Success
|
|
78
|
+
-------------------------------
|
|
79
|
+
|
|
80
|
+
The Unicore Operators do not do a lot of error and exception handling, and mostly just forward any problems to be handled by airflow.
|
|
81
|
+
All of the Unicore logic is handled by the `pyunicore library <https://github.com/HumanBrainProject/pyunicore>`_.
|
|
82
|
+
|
|
83
|
+
While some validation of the resulting Unicore job description is done automatically, it may still be possible to build an invalid job description with the operators.
|
|
84
|
+
This may lead to a submission failure with Unicore. In this case, an exception is thrown to be handled by airflow.
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
For a successful job submission, the job exit code is returned as the task return value, so that airflow can handle non-zero exit codes.
|
|
88
|
+
All operators will also append the content of the job-log-file from Unicore to the airflow task log.
|
|
89
|
+
Also, some job results and values will be uploaded via airflow-x-coms as well:
|
|
90
|
+
|
|
91
|
+
======================= ========================================
|
|
92
|
+
xcom name description
|
|
93
|
+
======================= ========================================
|
|
94
|
+
Unicore Job ID the Unicore ID for the job
|
|
95
|
+
Unicore Job the TSI script that was submitted by Unicore
|
|
96
|
+
BSS_SUBMIT the bss_script submitted by Unicore
|
|
97
|
+
status_message the status message for the Unicore job
|
|
98
|
+
log the Unicore job log
|
|
99
|
+
workdir_content content of the job workdir upon completion
|
|
100
|
+
[xcom_output_files] content of each file in their own xcom, by default stdout and stderr
|
|
101
|
+
======================= ========================================
|
|
102
|
+
|
|
103
|
+
------------
|
|
104
|
+
Example DAGs
|
|
105
|
+
------------
|
|
106
|
+
|
|
107
|
+
There are some example DAGs in this repository under ``project-dir/dags``.
|
|
108
|
+
|
|
109
|
+
- ``unicore-test-1.py`` just shows basic date and executable usage.
|
|
110
|
+
- ``unicore-test-2.py`` has some basic examples for the generic operator.
|
|
111
|
+
- ``unicore-test-3.py`` also includes script-operator examples.
|
|
112
|
+
- ``unicore-test-4.py`` has some examples with more arguments.
|
|
113
|
+
- ``unicore-test-bss.py`` shows how bss submission can be done (very simple example).
|
|
114
|
+
- ``unicore-test-credentials.py`` demonstrates that not only the credentials from the airflow connections backend can be used, but they can also be provided in the constructor of the o`perator.
|
|
115
|
+
- ``unicore-test-import-export.py`` gives shprt examples for the imports and exports usage.
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
-----------------
|
|
119
|
+
Setup testing env
|
|
120
|
+
-----------------
|
|
121
|
+
|
|
122
|
+
Ensure a current version of docker is installed.
|
|
123
|
+
|
|
124
|
+
Run ``python3 -m build`` to build the python package.
|
|
125
|
+
|
|
126
|
+
Run the ``testing-env/build-image.sh`` script to create the customized airflow image, which will contain the newly build python package.
|
|
127
|
+
|
|
128
|
+
Run ``testing-env/run-testing-env.sh init`` to initialize the airflow containers, database etc. This only needs to be done once.
|
|
129
|
+
|
|
130
|
+
Run ``testing-env/run-testing-env.sh up`` to start the local airflow and Unicore deployment. Airflow will be available on port 8080, Unicore on port 8081.
|
|
131
|
+
|
|
132
|
+
The ``run-testing-env.sh`` script supports the commands up, down, start, stop, ps and init for matching docker compose functions.
|
|
133
|
+
|
|
134
|
+
-----------------------
|
|
135
|
+
Install package via pip
|
|
136
|
+
-----------------------
|
|
137
|
+
|
|
138
|
+
``pip install airflow-unicore-integration --index-url https://gitlab.jsc.fz-juelich.de/api/v4/projects/6269/packages/pypi/simple``
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = [
|
|
3
|
+
"setuptools>=61.0"
|
|
4
|
+
]
|
|
5
|
+
build-backend = "setuptools.build_meta"
|
|
6
|
+
|
|
7
|
+
[project]
|
|
8
|
+
name = "airflow-unicore-integration"
|
|
9
|
+
version = "0.0.4"
|
|
10
|
+
authors = [
|
|
11
|
+
{ name="Christian Böttcher", email="c.boettcher@fz-juelich.de" },
|
|
12
|
+
]
|
|
13
|
+
description = "Running Unicore Jobs from airflow DAGs."
|
|
14
|
+
readme = {file = "README.txt", content-type = "text/markdown"}
|
|
15
|
+
requires-python = ">=3.10"
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
|
+
"Framework :: Apache Airflow :: Provider",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"License :: OSI Approved :: MIT License",
|
|
21
|
+
"Operating System :: OS Independent",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
dependencies = [
|
|
27
|
+
"pyunicore>=1.0.0",
|
|
28
|
+
"apache-airflow==2.8.4"
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.urls]
|
|
32
|
+
Homepage = "https://gitlab.jsc.fz-juelich.de/boettcher1/airflow_unicore_integration"
|
|
33
|
+
Issues = "https://gitlab.jsc.fz-juelich.de/boettcher1/airflow_unicore_integration/-/issues"
|
|
34
|
+
|
|
35
|
+
[project.entry-points."apache_airflow_provider"]
|
|
36
|
+
provider_info = "airflow_unicore_integration:get_provider_info"
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
def get_provider_info():
|
|
2
|
+
return {
|
|
3
|
+
"package-name": "airflow-unicore-integration",
|
|
4
|
+
"name": "Unicore",
|
|
5
|
+
"description": "Apache Airflow Unicore provider containing Operators and hooks.",
|
|
6
|
+
"connection-types": [{
|
|
7
|
+
"connection-type": "unicore",
|
|
8
|
+
"hook-class-name": "airflow_unicore_integration.hooks.unicore_hooks.UnicoreHook",
|
|
9
|
+
}],
|
|
10
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pyunicore import client, credentials
|
|
4
|
+
|
|
5
|
+
from airflow.hooks.base import BaseHook
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class UnicoreHook(BaseHook):
|
|
9
|
+
"""
|
|
10
|
+
Interact with Unicore.
|
|
11
|
+
|
|
12
|
+
Creates Unicore Clients from airflow connections.
|
|
13
|
+
|
|
14
|
+
:param uc_conn_id: The unicore connection id - default: uc_default
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
conn_name_attr = "uc_conn_id"
|
|
18
|
+
default_conn_name = "uc_default"
|
|
19
|
+
conn_type = "unicore"
|
|
20
|
+
hook_name = "Unicore"
|
|
21
|
+
|
|
22
|
+
def __init__(self, uc_conn_id: str = default_conn_name) -> None:
|
|
23
|
+
super().__init__()
|
|
24
|
+
self.uc_conn_id = uc_conn_id
|
|
25
|
+
self.conn = None
|
|
26
|
+
|
|
27
|
+
def get_conn(self, overwrite_base_url: str | None = None, overwrite_credential : credentials.Credential | None = None) -> client.Client:
|
|
28
|
+
"""Return a Unicore Client. base_url and credentials may be overwritten."""
|
|
29
|
+
if self.conn is None or overwrite_base_url is not None or overwrite_credential is not None: # if not generated, or overwrite attribute is set crete new
|
|
30
|
+
params = self.get_connection(self.uc_conn_id)
|
|
31
|
+
base_url = params.host
|
|
32
|
+
credential = credentials.UsernamePassword(params.login, params.password)
|
|
33
|
+
if overwrite_base_url is not None:
|
|
34
|
+
base_url = overwrite_base_url
|
|
35
|
+
if overwrite_credential is not None:
|
|
36
|
+
credential = overwrite_credential
|
|
37
|
+
self.conn = client.Client(credential, base_url)
|
|
38
|
+
|
|
39
|
+
return self.conn
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_connection(self) -> tuple[bool, str]:
|
|
43
|
+
"""Test the connection by sending an access_info request"""
|
|
44
|
+
try:
|
|
45
|
+
conn = self.get_conn()
|
|
46
|
+
conn.access_info()
|
|
47
|
+
return True, "Connection successfully tested"
|
|
48
|
+
except Exception as e:
|
|
49
|
+
return False, str(e)
|
|
File without changes
|
airflow_unicore_integration-0.0.4/src/airflow_unicore_integration/operators/unicore_operators.py
ADDED
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
from airflow.models.baseoperator import BaseOperator
|
|
2
|
+
from airflow.decorators.base import DecoratedOperator, task_decorator_factory
|
|
3
|
+
from typing import Any, List, Dict
|
|
4
|
+
|
|
5
|
+
from airflow.utils.context import Context
|
|
6
|
+
|
|
7
|
+
import pyunicore.client as uc_client
|
|
8
|
+
import pyunicore.credentials as uc_credentials
|
|
9
|
+
from airflow_unicore_integration.hooks import unicore_hooks
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
DEFAULT_SCRIPT_NAME = 'default_script_from_job_description'
|
|
16
|
+
DEFAULT_BSS_FILE = 'default_bss_file_upload'
|
|
17
|
+
|
|
18
|
+
class JobDescriptionException(BaseException):
|
|
19
|
+
""" For errors in the unicore job description that would be generated by the unicore operators."""
|
|
20
|
+
def __init__(self, *args: object) -> None:
|
|
21
|
+
super().__init__(*args)
|
|
22
|
+
|
|
23
|
+
class UnicoreGenericOperator(BaseOperator):
|
|
24
|
+
|
|
25
|
+
def __init__(self, name: str, application_name : str | None = None, application_version: str | None = None, executable: str | None = None, arguments: List[str] | None = None,
|
|
26
|
+
environment: List[str] | None = None, parameters: Dict[str,str | List[str]] | None = None, stdout: str | None = None, stderr: str | None = None, stdin: str | None = None, ignore_non_zero_exit_code: bool | None = None,
|
|
27
|
+
user_pre_command: str | None = None, run_user_pre_command_on_login_node: bool | None = None, user_pre_command_ignore_non_zero_exit_code: bool | None = None, user_post_command: str | None = None,
|
|
28
|
+
run_user_post_command_on_login_node: bool | None = None, user_post_command_ignore_non_zero_exit_code: bool | None = None, resources: Dict[str, str] | None = None, project: str | None = None,
|
|
29
|
+
imports: List[Dict[str,str | List[str]]] | None = None, exports: List[Dict[str,str | List[str]]] | None = None, have_client_stagein: bool | None = None, job_type: str | None = None,
|
|
30
|
+
login_node: str | None = None, bss_file: str | None = None, tags: List[str] | None = None, notification: str | None = None, user_email: str | None = None, xcom_output_files: List[str] = ["stdout", "stderr"],
|
|
31
|
+
base_url: str | None = None, credential: uc_credentials.Credential | None = None, credential_username: str | None = None, credential_password: str | None = None, credential_token: str | None = None, **kwargs):
|
|
32
|
+
"""
|
|
33
|
+
Initialize a Unicore Job Operator.
|
|
34
|
+
:param name: The name parameter defines both the airflow task name and the unicore job name.
|
|
35
|
+
:param xcom_output_files: A list of filenames to be exported from the job directory via x_coms. By default stdout and stderr.
|
|
36
|
+
:param base_url: The base URL of the UNICOREX server to be used for the unicore client. Overwrites the configured airflow conenction.
|
|
37
|
+
:param credential: A unicore Credential to be used for the unicore client. Overwrites the configured user-pass in the airflow connection.
|
|
38
|
+
:param credential_username: Username for the unicore client credentials. Overwrites the configured user in the airflow connection.
|
|
39
|
+
:param credential_password: Password the the unicore client credentials. Overwrites the configured password in the airflow connection.
|
|
40
|
+
:param credential_token: An OIDC token to be used by the unicore client. Overwrites the configured credentials in teh airflow connection.
|
|
41
|
+
|
|
42
|
+
All other parameters are parameters for the Unicore Job Description as defined [here](https://unicore-docs.readthedocs.io/en/latest/user-docs/rest-api/job-description/index.html#overview).
|
|
43
|
+
"""
|
|
44
|
+
super().__init__(**kwargs)
|
|
45
|
+
self.name = name
|
|
46
|
+
self.application_name = application_name
|
|
47
|
+
self.application_version = application_version
|
|
48
|
+
self.executable = executable
|
|
49
|
+
self.arguments = arguments
|
|
50
|
+
self.environment = environment
|
|
51
|
+
self.parameters = parameters
|
|
52
|
+
self.stdout = stdout
|
|
53
|
+
self.stderr = stderr
|
|
54
|
+
self.stdin = stdin
|
|
55
|
+
self.ignore_non_zero_exit_code = ignore_non_zero_exit_code
|
|
56
|
+
self.user_pre_command = user_pre_command
|
|
57
|
+
self.run_user_pre_command_on_login_node = run_user_pre_command_on_login_node
|
|
58
|
+
self.user_pre_command_ignore_non_zero_exit_code = user_pre_command_ignore_non_zero_exit_code
|
|
59
|
+
self.user_post_command = user_post_command
|
|
60
|
+
self.run_user_post_command_on_login_node = run_user_post_command_on_login_node
|
|
61
|
+
self.user_post_command_ignore_non_zero_exit_code = user_post_command_ignore_non_zero_exit_code
|
|
62
|
+
self.resources = resources
|
|
63
|
+
self.project = project
|
|
64
|
+
self.imports = imports
|
|
65
|
+
self.exports = exports
|
|
66
|
+
self.have_client_stagein = have_client_stagein
|
|
67
|
+
self.job_type = job_type
|
|
68
|
+
self.login_node = login_node
|
|
69
|
+
self.bss_file = bss_file
|
|
70
|
+
self.tags = tags
|
|
71
|
+
self.notification = notification
|
|
72
|
+
self.user_email = user_email
|
|
73
|
+
self.xcom_output_files = xcom_output_files
|
|
74
|
+
|
|
75
|
+
self.base_url = base_url
|
|
76
|
+
self.credential = credential
|
|
77
|
+
self.credential_username = credential_username
|
|
78
|
+
self.credential_password = credential_password
|
|
79
|
+
self.credential_token = credential_token
|
|
80
|
+
|
|
81
|
+
self.validate_job_description()
|
|
82
|
+
logger.debug("created Unicore Job Task")
|
|
83
|
+
|
|
84
|
+
def validate_job_description(self):
|
|
85
|
+
"""
|
|
86
|
+
Does some basic validation and checks if a proper job description can be generated. Raises a JobDescriptionException if not.
|
|
87
|
+
"""
|
|
88
|
+
# check for some errors in the parameters for creating the unicore job
|
|
89
|
+
|
|
90
|
+
# first check if application or executable been set
|
|
91
|
+
if self.application_name is None and self.executable is None: # TODO check if executable is required if bss_file is given
|
|
92
|
+
raise JobDescriptionException
|
|
93
|
+
|
|
94
|
+
# if bss_file is set, we need an executable
|
|
95
|
+
if self.bss_file is not None:
|
|
96
|
+
if self.executable is None and self.application_name is not None:
|
|
97
|
+
raise JobDescriptionException
|
|
98
|
+
# TODO validate BSS file?
|
|
99
|
+
logger.info("using bss file")
|
|
100
|
+
|
|
101
|
+
if self.credential_token is not None and self.credential is None:
|
|
102
|
+
logger.info("using provided oidc token")
|
|
103
|
+
self.credential = uc_credentials.OIDCToken(token=self.credential_token)
|
|
104
|
+
|
|
105
|
+
if self.credential_username is not None and self.credential_password is not None and self.credential is None:
|
|
106
|
+
logger.info("using provied user/pass credentials")
|
|
107
|
+
self.credential = uc_credentials.UsernamePassword(username=self.credential_username, password=self.credential_password)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def get_job_description(self) -> dict[str,Any]:
|
|
111
|
+
"""Generates the job description to be submitted to the unicore server."""
|
|
112
|
+
job_description_dict: Dict = {}
|
|
113
|
+
|
|
114
|
+
# now add the various simple string attribute fragments to the list, when they are not None
|
|
115
|
+
if self.name is not None:
|
|
116
|
+
job_description_dict["Name"] = self.name
|
|
117
|
+
|
|
118
|
+
if self.application_name is not None:
|
|
119
|
+
job_description_dict["ApplicationName"] = self.application_name
|
|
120
|
+
|
|
121
|
+
if self.application_version is not None:
|
|
122
|
+
job_description_dict["ApplicationVersion"] = self.application_version
|
|
123
|
+
|
|
124
|
+
if self.executable is not None:
|
|
125
|
+
job_description_dict["Executable"] = self.executable
|
|
126
|
+
|
|
127
|
+
if self.arguments is not None:
|
|
128
|
+
job_description_dict["Arguments"] = self.arguments
|
|
129
|
+
|
|
130
|
+
if self.environment is not None:
|
|
131
|
+
job_description_dict["Environment"] = self.environment
|
|
132
|
+
|
|
133
|
+
if self.parameters is not None:
|
|
134
|
+
job_description_dict["Parameters"] = self.parameters
|
|
135
|
+
|
|
136
|
+
if self.stdout is not None:
|
|
137
|
+
job_description_dict["Stdout"] = self.stdout
|
|
138
|
+
|
|
139
|
+
if self.stderr is not None:
|
|
140
|
+
job_description_dict["Stderr"] = self.stderr
|
|
141
|
+
|
|
142
|
+
if self.stdin is not None:
|
|
143
|
+
job_description_dict["Stdin"] = self.stdin
|
|
144
|
+
|
|
145
|
+
if self.ignore_non_zero_exit_code is not None:
|
|
146
|
+
job_description_dict["IgnoreNonZeroExitCode"] = self.ignore_non_zero_exit_code
|
|
147
|
+
|
|
148
|
+
if self.user_pre_command is not None:
|
|
149
|
+
job_description_dict["User precommand"] = self.user_pre_command
|
|
150
|
+
|
|
151
|
+
if self.run_user_pre_command_on_login_node is not None:
|
|
152
|
+
job_description_dict["RunUserPrecommandOnLoginNode"] = self.run_user_pre_command_on_login_node
|
|
153
|
+
|
|
154
|
+
if self.user_pre_command_ignore_non_zero_exit_code is not None:
|
|
155
|
+
job_description_dict["UserPrecommandIgnoreNonZeroExitCode"] = self.user_pre_command_ignore_non_zero_exit_code
|
|
156
|
+
|
|
157
|
+
if self.user_post_command is not None:
|
|
158
|
+
job_description_dict["User postcommand"] = self.user_post_command
|
|
159
|
+
|
|
160
|
+
if self.run_user_post_command_on_login_node is not None:
|
|
161
|
+
job_description_dict["RunUserPostcommandOnLoginNode"] = self.run_user_post_command_on_login_node
|
|
162
|
+
|
|
163
|
+
if self.user_post_command_ignore_non_zero_exit_code is not None:
|
|
164
|
+
job_description_dict["UserPostcommandIgnoreNonZeroExitCode"] = self.user_post_command_ignore_non_zero_exit_code
|
|
165
|
+
|
|
166
|
+
if self.resources is not None:
|
|
167
|
+
job_description_dict["Resources"] = self.resources
|
|
168
|
+
|
|
169
|
+
if self.project is not None:
|
|
170
|
+
job_description_dict["Project"] = self.project
|
|
171
|
+
|
|
172
|
+
if self.imports is not None:
|
|
173
|
+
job_description_dict["Imports"] = self.imports
|
|
174
|
+
|
|
175
|
+
if self.exports is not None:
|
|
176
|
+
job_description_dict["Exports"] = self.exports
|
|
177
|
+
|
|
178
|
+
if self.have_client_stagein is not None:
|
|
179
|
+
job_description_dict["haveClientStageIn"] =self.have_client_stagein
|
|
180
|
+
|
|
181
|
+
if self.job_type is not None:
|
|
182
|
+
job_description_dict["Job type"] = self.job_type
|
|
183
|
+
|
|
184
|
+
if self.login_node is not None:
|
|
185
|
+
job_description_dict["Login node"] = self.login_node
|
|
186
|
+
|
|
187
|
+
if self.bss_file is not None:
|
|
188
|
+
job_description_dict["BSS file"] = self.bss_file
|
|
189
|
+
|
|
190
|
+
if self.notification is not None:
|
|
191
|
+
job_description_dict["Notification"] = self.notification
|
|
192
|
+
|
|
193
|
+
if self.user_email is not None:
|
|
194
|
+
job_description_dict["User email"] = self.user_email
|
|
195
|
+
|
|
196
|
+
if self.tags is not None:
|
|
197
|
+
job_description_dict["Tags"] = self.tags
|
|
198
|
+
|
|
199
|
+
return job_description_dict
|
|
200
|
+
|
|
201
|
+
def get_uc_client(self, uc_conn_id: str | None = None) -> uc_client.Client:
|
|
202
|
+
"""Return a unicore client for submitting the job. Will use provided base_url and credentials if possible, else it will use the default unicore connection from airflow."""
|
|
203
|
+
if self.base_url is not None and self.credential is not None:
|
|
204
|
+
return uc_client.Client(self.credential, self.base_url)
|
|
205
|
+
if uc_conn_id is None:
|
|
206
|
+
hook = unicore_hooks.UnicoreHook()
|
|
207
|
+
else:
|
|
208
|
+
hook = unicore_hooks.UnicoreHook(uc_conn_id=uc_conn_id)
|
|
209
|
+
return hook.get_conn(overwrite_base_url=self.base_url, overwrite_credential=self.credential)
|
|
210
|
+
|
|
211
|
+
def execute_async(self, context: Context) -> Any:
|
|
212
|
+
"""Submits the job and returns the job object without waiting for it to finish."""
|
|
213
|
+
client = self.get_uc_client()
|
|
214
|
+
job = client.new_job(job_description=self.get_job_description(), inputs=[])
|
|
215
|
+
return job
|
|
216
|
+
|
|
217
|
+
def execute(self, context: Context) -> Any:
|
|
218
|
+
"""
|
|
219
|
+
Submits the job description to the unicore server.
|
|
220
|
+
After the job is finished (failed or successful), some basic output such as logs, status messages, job id or file content will be retreived and returned via x_coms or airflow logs.
|
|
221
|
+
The exit code of this functions is the exit code of the unicore job, to ensure proper handling of failed or aborted jobs.
|
|
222
|
+
"""
|
|
223
|
+
import logging
|
|
224
|
+
from pyunicore.client import JobStatus, Job
|
|
225
|
+
logger = logging.getLogger(__name__)
|
|
226
|
+
|
|
227
|
+
job: Job = self.execute_async(context) # TODO depending on params this may spawn multiple jobs -> in those cases, e.g. output needs to be handled differently
|
|
228
|
+
logger.debug(f"Waiting for unicore job {job}")
|
|
229
|
+
job.poll() # wait for job to finish
|
|
230
|
+
|
|
231
|
+
task_instance = context['task_instance']
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
task_instance.xcom_push(key="status_message", value=job.properties["statusMessage"])
|
|
235
|
+
task_instance.xcom_push(key="log", value=job.properties["log"])
|
|
236
|
+
for line in job.properties["log"]:
|
|
237
|
+
logger.info(f"UNICORE LOGS: {line}")
|
|
238
|
+
|
|
239
|
+
if job.status is not JobStatus.SUCCESSFUL:
|
|
240
|
+
from airflow.exceptions import AirflowFailException
|
|
241
|
+
logger.error(f"Unicore job not successful. Job state is {job.status}. Aborting this task.")
|
|
242
|
+
raise AirflowFailException
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
work_dir = job.working_dir
|
|
246
|
+
|
|
247
|
+
content = work_dir.contents()['content']
|
|
248
|
+
task_instance.xcom_push(key="workdir_content", value=content)
|
|
249
|
+
|
|
250
|
+
for filename in content.keys():
|
|
251
|
+
if "/UNICORE_Job_" in filename:
|
|
252
|
+
task_instance.xcom_push(key="Unicore Job ID", value=filename[13:])
|
|
253
|
+
jobt_text = work_dir.stat(filename).raw().read().decode("utf-8")
|
|
254
|
+
task_instance.xcom_push(key="UNICORE Job", value=jobt_text)
|
|
255
|
+
continue
|
|
256
|
+
if "bss_submit_" in filename:
|
|
257
|
+
bss_submit_text = work_dir.stat(filename).raw().read().decode("utf-8")
|
|
258
|
+
task_instance.xcom_push(key="BSS_SUBMIT", value=bss_submit_text)
|
|
259
|
+
continue
|
|
260
|
+
|
|
261
|
+
from requests.exceptions import HTTPError
|
|
262
|
+
for file in self.xcom_output_files:
|
|
263
|
+
try:
|
|
264
|
+
logger.debug(f"Retreiving file {file} from workdir.")
|
|
265
|
+
remote_file = work_dir.stat(file)
|
|
266
|
+
content = remote_file.raw().read().decode("utf-8")
|
|
267
|
+
task_instance.xcom_push(key=file,value=content)
|
|
268
|
+
except HTTPError as http_error:
|
|
269
|
+
logger.error(f"Error while retreiving file {file} from workdir.", http_error)
|
|
270
|
+
continue
|
|
271
|
+
except UnicodeDecodeError as unicore_error:
|
|
272
|
+
logger.error(f"Error while decoding file {file}.", unicore_error)
|
|
273
|
+
|
|
274
|
+
exit_code = job.properties["exitCode"]
|
|
275
|
+
return exit_code
|
|
276
|
+
|
|
277
|
+
class UnicoreScriptOperator(UnicoreGenericOperator):
|
|
278
|
+
"""
|
|
279
|
+
This operator uplaods and submits a script to the unicore server as a job. Behaviour and parameters are otherwise the same as the UnicoreBaseOpertor.
|
|
280
|
+
"""
|
|
281
|
+
def __init__(self, name: str, script_content: str, **kwargs):
|
|
282
|
+
"""
|
|
283
|
+
:param script_content: the content of the script to be executed as a string
|
|
284
|
+
"""
|
|
285
|
+
super().__init__(name=name, executable=DEFAULT_SCRIPT_NAME, **kwargs)
|
|
286
|
+
lines = script_content.split('\n')
|
|
287
|
+
script_stagein = {
|
|
288
|
+
"To": DEFAULT_SCRIPT_NAME,
|
|
289
|
+
"Data": lines
|
|
290
|
+
}
|
|
291
|
+
if self.imports is not None:
|
|
292
|
+
self.imports.append(script_stagein)
|
|
293
|
+
else:
|
|
294
|
+
self.imports = [script_stagein]
|
|
295
|
+
|
|
296
|
+
class UnicoreBSSOperator(UnicoreGenericOperator):
|
|
297
|
+
"""
|
|
298
|
+
This operator uplaods and submits a BSS-script to the unicore server as a job. Behaviour and parameters are otherwise the same as the UnicoreBaseOpertor.
|
|
299
|
+
"""
|
|
300
|
+
def __init__(self, name: str, bss_file_content: str, **kwargs):
|
|
301
|
+
"""
|
|
302
|
+
:param bss_file_content: the content of the BSS-script to be executed as a string
|
|
303
|
+
"""
|
|
304
|
+
super().__init__(name=name, bss_file=DEFAULT_BSS_FILE, job_type="raw", xcom_output_files=[], **kwargs)
|
|
305
|
+
lines = bss_file_content.split('\n')
|
|
306
|
+
bss_stagein = {
|
|
307
|
+
"To": DEFAULT_BSS_FILE,
|
|
308
|
+
"Data": lines
|
|
309
|
+
}
|
|
310
|
+
if self.imports is not None:
|
|
311
|
+
self.imports.append(bss_stagein)
|
|
312
|
+
else:
|
|
313
|
+
self.imports = [bss_stagein]
|
|
314
|
+
|
|
315
|
+
class UnicoreExecutableOperator(UnicoreGenericOperator):
|
|
316
|
+
"""A simplified operator for just executing an executable. Still supports all parameters fo the UnicoreBaseOperator."""
|
|
317
|
+
def __init__(self, name: str, executable: str, xcom_output_files : List[str] = ["stdout","stderr"], **kwargs) -> None:
|
|
318
|
+
super().__init__(name=name, executable=executable, xcom_output_files=xcom_output_files, **kwargs)
|
|
319
|
+
|
|
320
|
+
class UnicoreDateOperator(UnicoreExecutableOperator):
|
|
321
|
+
"""An operator for executing the 'date' executable. Mostly for testing purposes. Still supports all parameters fo the UnicoreBaseOperator."""
|
|
322
|
+
def __init__(self, name: str, **kwargs) -> None:
|
|
323
|
+
super().__init__(name=name, executable='date',**kwargs)
|
|
324
|
+
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: airflow-unicore-integration
|
|
3
|
+
Version: 0.0.4
|
|
4
|
+
Summary: Running Unicore Jobs from airflow DAGs.
|
|
5
|
+
Author-email: Christian Böttcher <c.boettcher@fz-juelich.de>
|
|
6
|
+
Project-URL: Homepage, https://gitlab.jsc.fz-juelich.de/boettcher1/airflow_unicore_integration
|
|
7
|
+
Project-URL: Issues, https://gitlab.jsc.fz-juelich.de/boettcher1/airflow_unicore_integration/-/issues
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Framework :: Apache Airflow :: Provider
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: pyunicore>=1.0.0
|
|
16
|
+
Requires-Dist: apache-airflow==2.8.4
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
README.rst
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/airflow_unicore_integration/__init__.py
|
|
4
|
+
src/airflow_unicore_integration.egg-info/PKG-INFO
|
|
5
|
+
src/airflow_unicore_integration.egg-info/SOURCES.txt
|
|
6
|
+
src/airflow_unicore_integration.egg-info/dependency_links.txt
|
|
7
|
+
src/airflow_unicore_integration.egg-info/entry_points.txt
|
|
8
|
+
src/airflow_unicore_integration.egg-info/requires.txt
|
|
9
|
+
src/airflow_unicore_integration.egg-info/top_level.txt
|
|
10
|
+
src/airflow_unicore_integration/hooks/__init__.py
|
|
11
|
+
src/airflow_unicore_integration/hooks/unicore_hooks.py
|
|
12
|
+
src/airflow_unicore_integration/operators/__init__.py
|
|
13
|
+
src/airflow_unicore_integration/operators/unicore_operators.py
|
airflow_unicore_integration-0.0.4/src/airflow_unicore_integration.egg-info/dependency_links.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
airflow_unicore_integration
|