airflow-unicore-integration 0.0.4__tar.gz → 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow_unicore_integration-0.1.0/LICENSE +29 -0
- airflow_unicore_integration-0.1.0/PKG-INFO +169 -0
- {airflow_unicore_integration-0.0.4 → airflow_unicore_integration-0.1.0}/README.rst +19 -7
- airflow_unicore_integration-0.1.0/pyproject.toml +63 -0
- {airflow_unicore_integration-0.0.4 → airflow_unicore_integration-0.1.0}/src/airflow_unicore_integration/__init__.py +6 -4
- airflow_unicore_integration-0.1.0/src/airflow_unicore_integration/executors/run_task_via_supervisor.py +85 -0
- airflow_unicore_integration-0.1.0/src/airflow_unicore_integration/executors/unicore_executor.py +123 -0
- airflow_unicore_integration-0.1.0/src/airflow_unicore_integration/hooks/unicore_hooks.py +49 -0
- airflow_unicore_integration-0.1.0/src/airflow_unicore_integration/operators/__init__.py +0 -0
- {airflow_unicore_integration-0.0.4 → airflow_unicore_integration-0.1.0}/src/airflow_unicore_integration/operators/unicore_operators.py +145 -74
- airflow_unicore_integration-0.1.0/src/airflow_unicore_integration/policies/__init__.py +0 -0
- airflow_unicore_integration-0.1.0/src/airflow_unicore_integration/util/job.py +101 -0
- airflow_unicore_integration-0.1.0/src/airflow_unicore_integration.egg-info/PKG-INFO +169 -0
- {airflow_unicore_integration-0.0.4 → airflow_unicore_integration-0.1.0}/src/airflow_unicore_integration.egg-info/SOURCES.txt +7 -1
- {airflow_unicore_integration-0.0.4 → airflow_unicore_integration-0.1.0}/src/airflow_unicore_integration.egg-info/entry_points.txt +3 -0
- airflow_unicore_integration-0.1.0/src/airflow_unicore_integration.egg-info/requires.txt +2 -0
- airflow_unicore_integration-0.0.4/PKG-INFO +0 -16
- airflow_unicore_integration-0.0.4/pyproject.toml +0 -36
- airflow_unicore_integration-0.0.4/src/airflow_unicore_integration/hooks/unicore_hooks.py +0 -49
- airflow_unicore_integration-0.0.4/src/airflow_unicore_integration.egg-info/PKG-INFO +0 -16
- airflow_unicore_integration-0.0.4/src/airflow_unicore_integration.egg-info/requires.txt +0 -2
- {airflow_unicore_integration-0.0.4 → airflow_unicore_integration-0.1.0}/setup.cfg +0 -0
- {airflow_unicore_integration-0.0.4/src/airflow_unicore_integration/hooks → airflow_unicore_integration-0.1.0/src/airflow_unicore_integration/executors}/__init__.py +0 -0
- {airflow_unicore_integration-0.0.4/src/airflow_unicore_integration/operators → airflow_unicore_integration-0.1.0/src/airflow_unicore_integration/hooks}/__init__.py +0 -0
- {airflow_unicore_integration-0.0.4 → airflow_unicore_integration-0.1.0}/src/airflow_unicore_integration.egg-info/dependency_links.txt +0 -0
- {airflow_unicore_integration-0.0.4 → airflow_unicore_integration-0.1.0}/src/airflow_unicore_integration.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
BSD 3-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) Forschungszentrum Juelich GmbH
|
|
4
|
+
All rights reserved.
|
|
5
|
+
|
|
6
|
+
Redistribution and use in source and binary forms, with or without
|
|
7
|
+
modification, are permitted provided that the following conditions are met:
|
|
8
|
+
|
|
9
|
+
* Redistributions of source code must retain the above copyright notice, this
|
|
10
|
+
list of conditions and the following disclaimer.
|
|
11
|
+
|
|
12
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
|
13
|
+
this list of conditions and the following disclaimer in the documentation
|
|
14
|
+
and/or other materials provided with the distribution.
|
|
15
|
+
|
|
16
|
+
* Neither the names of the copyright holders nor the names of its
|
|
17
|
+
contributors may be used to endorse or promote products derived from
|
|
18
|
+
this software without specific prior written permission.
|
|
19
|
+
|
|
20
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
21
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
22
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
23
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
24
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
25
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
26
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
27
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
28
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
29
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: airflow-unicore-integration
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Running Unicore Jobs from airflow DAGs.
|
|
5
|
+
Author-email: Christian Böttcher <c.boettcher@fz-juelich.de>
|
|
6
|
+
License-Expression: BSD-3-Clause
|
|
7
|
+
Project-URL: Homepage, https://github.com/UNICORE-EU/airflow-unicore-integration
|
|
8
|
+
Project-URL: Issues, https://github.com/UNICORE-EU/airflow-unicore-integration/issues
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Framework :: Apache Airflow :: Provider
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Requires-Python: >=3.9
|
|
14
|
+
Description-Content-Type: text/x-rst
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Requires-Dist: pyunicore>=1.0.0
|
|
17
|
+
Requires-Dist: apache-airflow>=3.0.0
|
|
18
|
+
Dynamic: license-file
|
|
19
|
+
|
|
20
|
+
===========================
|
|
21
|
+
Unicore Airflow Integration
|
|
22
|
+
===========================
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|Generic badge|
|
|
26
|
+
|
|
27
|
+
.. |Generic badge| image:: https://github.com/UNICORE-EU/airflow-unicore-integration/actions/workflows/publish-to-pypi.yml/badge.svg
|
|
28
|
+
:target: https://github.com/UNICORE-EU/airflow-unicore-integration/actions/workflows/publish-to-pypi.yml
|
|
29
|
+
|
|
30
|
+
This project integrates `UNICORE <https://github.com/UNICORE-EU>`_ and `Apache Airflow <https://airflow.apache.org/>`_.
|
|
31
|
+
UNICORE is a software suite that, among other functions, provides seamless access to high-performance compute and data resources.
|
|
32
|
+
Airflow is a platform to programmatically author, schedule and monitor workflows.
|
|
33
|
+
|
|
34
|
+
In the current state, this projects provides a set of airflow `operators <https://airflow.apache.org/docs/apache-airflow/stable/core-concepts/operators.html>`_, which can be used as part of airflow workflows to submit jobs to Unicore.
|
|
35
|
+
The UnicoreExecutor only offers experimental support for airflow 3 so far. Further support is currently being worked on.
|
|
36
|
+
|
|
37
|
+
---------------------------
|
|
38
|
+
Using the Unicore Operators
|
|
39
|
+
---------------------------
|
|
40
|
+
|
|
41
|
+
There are multiple Unicore operators provided by this package. The most versatile one is the ``UnicoreGenericOperator``, which supports a lot of job parameters.
|
|
42
|
+
All other operators are intended to offer a slightly less complex constructor, and therefore simpler usage, but all generic parameters are still available to be used.
|
|
43
|
+
|
|
44
|
+
All operators support all possible parameters of the `Unicore job description <https://unicore-docs.readthedocs.io/en/latest/user-docs/rest-api/job-description/index.html#overview>`_. Here is an excerpt containing some commonly used parameters:
|
|
45
|
+
|
|
46
|
+
======================= ======================= =========================================== ====================
|
|
47
|
+
parameter name type default description
|
|
48
|
+
======================= ======================= =========================================== ====================
|
|
49
|
+
application_name str None Application Name
|
|
50
|
+
application_version str None Application Version
|
|
51
|
+
executable str None Command line executable
|
|
52
|
+
arguments List(str) None Command line arguments
|
|
53
|
+
environment Map(str,str) None environment arguments
|
|
54
|
+
parameters Map None Application Parameters
|
|
55
|
+
project str None Accounting Project
|
|
56
|
+
imports List(imports) None Stage-in/data import - see Unicore docs
|
|
57
|
+
exports List(exports) None Stage-out/data export - see Unicore docs
|
|
58
|
+
======================= ======================= =========================================== ====================
|
|
59
|
+
|
|
60
|
+
For imports and exports go `here <https://unicore-docs.readthedocs.io/en/latest/user-docs/rest-api/job-description/index.html#importing-files-into-the-job-workspace>`_ for details.
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
The ``UnicoreGenericOperator`` supports the following additional parameters:
|
|
64
|
+
|
|
65
|
+
======================= ======================= =========================================== ====================
|
|
66
|
+
parameter name type default description
|
|
67
|
+
======================= ======================= =========================================== ====================
|
|
68
|
+
name str None name for the airflow task and the Unicore job
|
|
69
|
+
xcom_output_files List(str) ["stdout","stderr"] list of files of which the content should be put into xcoms
|
|
70
|
+
base_url str configured in airflow connections or None The base URL of the UNICOREX server to be used for the Unicore client
|
|
71
|
+
credential pyunicore credential configured in airflow connections or None A Unicore Credential to be used for the Unicore client
|
|
72
|
+
credential_username str configured in airflow connections or None Username for the Unicore client credentials
|
|
73
|
+
credential_password str configured in airflow connections or None Password the the Unicore client credentials
|
|
74
|
+
credential_token str configured in airflow connections or None An OIDC token to be used by the Unicore client
|
|
75
|
+
======================= ======================= =========================================== ====================
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
The ``UnicoreScriptOperator`` offers a way to more easily submit a script as a job, where the script content can be provided as a string.
|
|
79
|
+
|
|
80
|
+
======================= ======================= =========================================== ====================
|
|
81
|
+
parameter name type default description
|
|
82
|
+
======================= ======================= =========================================== ====================
|
|
83
|
+
script_content str None The content of the script file
|
|
84
|
+
======================= ======================= =========================================== ====================
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
The ``UnicoreBSSOperator`` offers a way to directly submit batch-scripts from their content-strings.
|
|
88
|
+
|
|
89
|
+
======================= ======================= =========================================== ====================
|
|
90
|
+
parameter name type default description
|
|
91
|
+
======================= ======================= =========================================== ====================
|
|
92
|
+
bss_file_content str None The content of the batch script file
|
|
93
|
+
======================= ======================= =========================================== ====================
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
The ``UnicoreExecutableOperator`` offers a reduced constructor that only requires an executable.
|
|
97
|
+
|
|
98
|
+
======================= ======================= =========================================== ====================
|
|
99
|
+
parameter name type default description
|
|
100
|
+
======================= ======================= =========================================== ====================
|
|
101
|
+
executable str None The executable to run for this job
|
|
102
|
+
xcom_output_files List(str) ["stdout","stderr"] list of files of which the content should be put into xcoms
|
|
103
|
+
======================= ======================= =========================================== ====================
|
|
104
|
+
|
|
105
|
+
The ``UnicoreDateOperator`` is more of a testing operator, since it will only run the ``date`` executable.
|
|
106
|
+
|
|
107
|
+
-------------------------------
|
|
108
|
+
Behaviour on Errors and Success
|
|
109
|
+
-------------------------------
|
|
110
|
+
|
|
111
|
+
The Unicore Operators do not do a lot of error and exception handling, and mostly just forward any problems to be handled by airflow.
|
|
112
|
+
All of the Unicore logic is handled by the `pyunicore library <https://github.com/HumanBrainProject/pyunicore>`_.
|
|
113
|
+
|
|
114
|
+
While some validation of the resulting Unicore job description is done automatically, it may still be possible to build an invalid job description with the operators.
|
|
115
|
+
This may lead to a submission failure with Unicore. In this case, an exception is thrown to be handled by airflow.
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
For a successful job submission, the job exit code is returned as the task return value, so that airflow can handle non-zero exit codes.
|
|
119
|
+
All operators will also append the content of the job-log-file from Unicore to the airflow task log.
|
|
120
|
+
Also, some job results and values will be uploaded via airflow-x-coms as well:
|
|
121
|
+
|
|
122
|
+
======================= ========================================
|
|
123
|
+
xcom name description
|
|
124
|
+
======================= ========================================
|
|
125
|
+
Unicore Job ID the Unicore ID for the job
|
|
126
|
+
Unicore Job the TSI script that was submitted by Unicore
|
|
127
|
+
BSS_SUBMIT the bss_script submitted by Unicore
|
|
128
|
+
status_message the status message for the Unicore job
|
|
129
|
+
log the Unicore job log
|
|
130
|
+
workdir_content content of the job workdir upon completion
|
|
131
|
+
[xcom_output_files] content of each file in their own xcom, by default stdout and stderr
|
|
132
|
+
======================= ========================================
|
|
133
|
+
|
|
134
|
+
------------
|
|
135
|
+
Example DAGs
|
|
136
|
+
------------
|
|
137
|
+
|
|
138
|
+
There are some example DAGs in this repository under ``project-dir/dags``.
|
|
139
|
+
|
|
140
|
+
- ``unicore-test-1.py`` just shows basic date and executable usage.
|
|
141
|
+
- ``unicore-test-2.py`` has some basic examples for the generic operator.
|
|
142
|
+
- ``unicore-test-3.py`` also includes script-operator examples.
|
|
143
|
+
- ``unicore-test-4.py`` has some examples with more arguments.
|
|
144
|
+
- ``unicore-test-bss.py`` shows how bss submission can be done (very simple example).
|
|
145
|
+
- ``unicore-test-credentials.py`` demonstrates that not only the credentials from the airflow connections backend can be used, but they can also be provided in the constructor of the operator.
|
|
146
|
+
- ``unicore-test-import-export.py`` gives short examples for the imports and exports usage.
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
-----------------
|
|
150
|
+
Setup testing env
|
|
151
|
+
-----------------
|
|
152
|
+
|
|
153
|
+
Ensure a current version of docker is installed.
|
|
154
|
+
|
|
155
|
+
Run ``python3 -m build`` to build the python package.
|
|
156
|
+
|
|
157
|
+
Run the ``testing-env/build-image.sh`` script to create the customized airflow image, which will contain the newly build python package.
|
|
158
|
+
|
|
159
|
+
Run ``testing-env/run-testing-env.sh init`` to initialize the airflow containers, database etc. This only needs to be done once.
|
|
160
|
+
|
|
161
|
+
Run ``testing-env/run-testing-env.sh up`` to start the local airflow and Unicore deployment. Airflow will be available on port 8080, Unicore on port 8081.
|
|
162
|
+
|
|
163
|
+
The ``run-testing-env.sh`` script supports the commands up, down, start, stop, ps and init for matching docker compose functions.
|
|
164
|
+
|
|
165
|
+
-----------------------
|
|
166
|
+
Install package via pip
|
|
167
|
+
-----------------------
|
|
168
|
+
|
|
169
|
+
``pip install airflow-unicore-integration``
|
|
@@ -3,6 +3,18 @@ Unicore Airflow Integration
|
|
|
3
3
|
===========================
|
|
4
4
|
|
|
5
5
|
|
|
6
|
+
|Generic badge|
|
|
7
|
+
|
|
8
|
+
.. |Generic badge| image:: https://github.com/UNICORE-EU/airflow-unicore-integration/actions/workflows/publish-to-pypi.yml/badge.svg
|
|
9
|
+
:target: https://github.com/UNICORE-EU/airflow-unicore-integration/actions/workflows/publish-to-pypi.yml
|
|
10
|
+
|
|
11
|
+
This project integrates `UNICORE <https://github.com/UNICORE-EU>`_ and `Apache Airflow <https://airflow.apache.org/>`_.
|
|
12
|
+
UNICORE is a software suite that, among other functions, provides seamless access to high-performance compute and data resources.
|
|
13
|
+
Airflow is a platform to programmatically author, schedule and monitor workflows.
|
|
14
|
+
|
|
15
|
+
In the current state, this projects provides a set of airflow `operators <https://airflow.apache.org/docs/apache-airflow/stable/core-concepts/operators.html>`_, which can be used as part of airflow workflows to submit jobs to Unicore.
|
|
16
|
+
The UnicoreExecutor only offers experimental support for airflow 3 so far. Further support is currently being worked on.
|
|
17
|
+
|
|
6
18
|
---------------------------
|
|
7
19
|
Using the Unicore Operators
|
|
8
20
|
---------------------------
|
|
@@ -29,7 +41,7 @@ exports List(exports) None
|
|
|
29
41
|
For imports and exports go `here <https://unicore-docs.readthedocs.io/en/latest/user-docs/rest-api/job-description/index.html#importing-files-into-the-job-workspace>`_ for details.
|
|
30
42
|
|
|
31
43
|
|
|
32
|
-
The ``UnicoreGenericOperator`` supports the following additional parameters:
|
|
44
|
+
The ``UnicoreGenericOperator`` supports the following additional parameters:
|
|
33
45
|
|
|
34
46
|
======================= ======================= =========================================== ====================
|
|
35
47
|
parameter name type default description
|
|
@@ -80,7 +92,7 @@ Behaviour on Errors and Success
|
|
|
80
92
|
The Unicore Operators do not do a lot of error and exception handling, and mostly just forward any problems to be handled by airflow.
|
|
81
93
|
All of the Unicore logic is handled by the `pyunicore library <https://github.com/HumanBrainProject/pyunicore>`_.
|
|
82
94
|
|
|
83
|
-
While some validation of the resulting Unicore job description is done automatically, it may still be possible to build an invalid job description with the operators.
|
|
95
|
+
While some validation of the resulting Unicore job description is done automatically, it may still be possible to build an invalid job description with the operators.
|
|
84
96
|
This may lead to a submission failure with Unicore. In this case, an exception is thrown to be handled by airflow.
|
|
85
97
|
|
|
86
98
|
|
|
@@ -110,10 +122,10 @@ There are some example DAGs in this repository under ``project-dir/dags``.
|
|
|
110
122
|
- ``unicore-test-2.py`` has some basic examples for the generic operator.
|
|
111
123
|
- ``unicore-test-3.py`` also includes script-operator examples.
|
|
112
124
|
- ``unicore-test-4.py`` has some examples with more arguments.
|
|
113
|
-
- ``unicore-test-bss.py`` shows how bss submission can be done (very simple example).
|
|
114
|
-
- ``unicore-test-credentials.py`` demonstrates that not only the credentials from the airflow connections backend can be used, but they can also be provided in the constructor of the
|
|
115
|
-
- ``unicore-test-import-export.py`` gives
|
|
116
|
-
|
|
125
|
+
- ``unicore-test-bss.py`` shows how bss submission can be done (very simple example).
|
|
126
|
+
- ``unicore-test-credentials.py`` demonstrates that not only the credentials from the airflow connections backend can be used, but they can also be provided in the constructor of the operator.
|
|
127
|
+
- ``unicore-test-import-export.py`` gives short examples for the imports and exports usage.
|
|
128
|
+
|
|
117
129
|
|
|
118
130
|
-----------------
|
|
119
131
|
Setup testing env
|
|
@@ -135,4 +147,4 @@ The ``run-testing-env.sh`` script supports the commands up, down, start, stop, p
|
|
|
135
147
|
Install package via pip
|
|
136
148
|
-----------------------
|
|
137
149
|
|
|
138
|
-
``pip install airflow-unicore-integration
|
|
150
|
+
``pip install airflow-unicore-integration``
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = [
|
|
3
|
+
"setuptools>=61.0",
|
|
4
|
+
"wheel"
|
|
5
|
+
]
|
|
6
|
+
build-backend = "setuptools.build_meta"
|
|
7
|
+
|
|
8
|
+
[project]
|
|
9
|
+
name = "airflow-unicore-integration"
|
|
10
|
+
version = "0.1.0"
|
|
11
|
+
authors = [
|
|
12
|
+
{ name="Christian Böttcher", email="c.boettcher@fz-juelich.de" },
|
|
13
|
+
]
|
|
14
|
+
description = "Running Unicore Jobs from airflow DAGs."
|
|
15
|
+
readme = "README.rst"
|
|
16
|
+
requires-python = ">=3.9"
|
|
17
|
+
license = "BSD-3-Clause"
|
|
18
|
+
classifiers = [
|
|
19
|
+
"Development Status :: 4 - Beta",
|
|
20
|
+
"Framework :: Apache Airflow :: Provider",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Operating System :: OS Independent",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
dependencies = [
|
|
28
|
+
"pyunicore>=1.0.0",
|
|
29
|
+
"apache-airflow>=3.0.0"
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.urls]
|
|
33
|
+
Homepage = "https://github.com/UNICORE-EU/airflow-unicore-integration"
|
|
34
|
+
Issues = "https://github.com/UNICORE-EU/airflow-unicore-integration/issues"
|
|
35
|
+
|
|
36
|
+
[project.entry-points."apache_airflow_provider"]
|
|
37
|
+
provider_info = "airflow_unicore_integration:get_provider_info"
|
|
38
|
+
|
|
39
|
+
[project.entry-points.'airflow.policy']
|
|
40
|
+
_ = 'airflow_unicore_integration.policies'
|
|
41
|
+
|
|
42
|
+
[tool.pytest.ini_options]
|
|
43
|
+
pythonpath = [
|
|
44
|
+
"src"
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
[tool.black]
|
|
48
|
+
line-length = 100
|
|
49
|
+
|
|
50
|
+
[tool.flake8]
|
|
51
|
+
max-line-length = 100
|
|
52
|
+
ignore = [
|
|
53
|
+
"N999",
|
|
54
|
+
"E501",
|
|
55
|
+
"W503"
|
|
56
|
+
]
|
|
57
|
+
per-file-ignores = [
|
|
58
|
+
"__init__.py:F401,E501",
|
|
59
|
+
"_version.py:E203",
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
[tool.mypy]
|
|
63
|
+
ignore_missing_imports=true
|
|
@@ -3,8 +3,10 @@ def get_provider_info():
|
|
|
3
3
|
"package-name": "airflow-unicore-integration",
|
|
4
4
|
"name": "Unicore",
|
|
5
5
|
"description": "Apache Airflow Unicore provider containing Operators and hooks.",
|
|
6
|
-
"connection-types": [
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
6
|
+
"connection-types": [
|
|
7
|
+
{
|
|
8
|
+
"connection-type": "unicore",
|
|
9
|
+
"hook-class-name": "airflow_unicore_integration.hooks.unicore_hooks.UnicoreHook",
|
|
10
|
+
}
|
|
11
|
+
],
|
|
10
12
|
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Usage:
|
|
3
|
+
|
|
4
|
+
python run_task_via_supervisor.py [--json-string <workload string> | --json-file <workload filepath>]
|
|
5
|
+
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import argparse
|
|
9
|
+
import sys
|
|
10
|
+
|
|
11
|
+
import structlog
|
|
12
|
+
from airflow.configuration import conf
|
|
13
|
+
from airflow.executors import workloads
|
|
14
|
+
from airflow.sdk.execution_time.supervisor import supervise
|
|
15
|
+
from pydantic import TypeAdapter
|
|
16
|
+
from pydantic_core._pydantic_core import ValidationError
|
|
17
|
+
|
|
18
|
+
log = structlog.get_logger(logger_name=__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def execute_workload_locally(workload: workloads.All):
|
|
22
|
+
if not isinstance(workload, workloads.ExecuteTask):
|
|
23
|
+
raise ValueError(f"Executor does not know how to handle {type(workload)}")
|
|
24
|
+
|
|
25
|
+
base_url = conf.get("api", "base_url", fallback="/")
|
|
26
|
+
default_execution_api_server = f"{base_url.rstrip('/')}/execution/"
|
|
27
|
+
server = conf.get("core", "execution_api_server_url", fallback=default_execution_api_server)
|
|
28
|
+
log.info("Connecting to server:", server=server)
|
|
29
|
+
|
|
30
|
+
supervise(
|
|
31
|
+
# This is the "wrong" ti type, but it duck types the same. TODO: Create a protocol for this.
|
|
32
|
+
ti=workload.ti, # type: ignore[arg-type]
|
|
33
|
+
dag_rel_path=workload.dag_rel_path,
|
|
34
|
+
bundle_info=workload.bundle_info,
|
|
35
|
+
token=workload.token,
|
|
36
|
+
server=server,
|
|
37
|
+
log_path=workload.log_path,
|
|
38
|
+
# Include the output of the task to stdout too, so that in process logs can be read from via the
|
|
39
|
+
# kubeapi as pod logs.
|
|
40
|
+
subprocess_logs_to_stdout=True,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def main():
|
|
45
|
+
parser = argparse.ArgumentParser(
|
|
46
|
+
description="Execute a workload in a Containerised executor using the task SDK."
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Create a mutually exclusive group to ensure that only one of the flags is set
|
|
50
|
+
group = parser.add_mutually_exclusive_group(required=True)
|
|
51
|
+
group.add_argument(
|
|
52
|
+
"--json-path",
|
|
53
|
+
help="Path to the input JSON file containing the execution workload payload.",
|
|
54
|
+
type=str,
|
|
55
|
+
)
|
|
56
|
+
group.add_argument(
|
|
57
|
+
"--json-string",
|
|
58
|
+
help="The JSON string itself containing the execution workload payload.",
|
|
59
|
+
type=str,
|
|
60
|
+
)
|
|
61
|
+
args = parser.parse_args()
|
|
62
|
+
|
|
63
|
+
decoder = TypeAdapter[workloads.All](workloads.All)
|
|
64
|
+
|
|
65
|
+
if args.json_path:
|
|
66
|
+
try:
|
|
67
|
+
with open(args.json_path) as file:
|
|
68
|
+
input_data = file.read()
|
|
69
|
+
workload = decoder.validate_json(input_data)
|
|
70
|
+
except OSError as e:
|
|
71
|
+
log.error("Failed to read file", error=str(e))
|
|
72
|
+
sys.exit(1)
|
|
73
|
+
|
|
74
|
+
elif args.json_string:
|
|
75
|
+
try:
|
|
76
|
+
workload = decoder.validate_json(args.json_string)
|
|
77
|
+
except ValidationError as e:
|
|
78
|
+
log.error("Failed to parse input JSON string", error=str(e))
|
|
79
|
+
sys.exit(1)
|
|
80
|
+
|
|
81
|
+
execute_workload_locally(workload)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
if __name__ == "__main__":
|
|
85
|
+
main()
|
airflow_unicore_integration-0.1.0/src/airflow_unicore_integration/executors/unicore_executor.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""
|
|
2
|
+
to configure for executor:
|
|
3
|
+
- Connection details for unicore: conn_id AIRFLOW__UNICORE_EXECUTOR__UNICORE_CONN_ID | should be defined, can be skipped if every task provides one
|
|
4
|
+
- location (path) of python virtualenv prepared on hpc system | AIRFLOW__UNICORE_EXECUTOR__DEFAULT_ENV | should be defined, can be skipped if every task provides one
|
|
5
|
+
|
|
6
|
+
tasks should be allowed to overwrite SITE, CREDENTIALS_*, UNICORE_CONN_ID and DEFAULT_ENV - i.e. everything but the database connection - credentials should be given as a uc_credential object via executor_config
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import time
|
|
11
|
+
from typing import Any
|
|
12
|
+
from typing import Dict
|
|
13
|
+
from typing import List
|
|
14
|
+
|
|
15
|
+
import pyunicore.client as uc_client
|
|
16
|
+
from airflow.configuration import conf
|
|
17
|
+
from airflow.executors.base_executor import BaseExecutor
|
|
18
|
+
from airflow.executors.workloads import All
|
|
19
|
+
from airflow.executors.workloads import ExecuteTask
|
|
20
|
+
from airflow.models.taskinstancekey import TaskInstanceKey
|
|
21
|
+
from airflow.utils.state import TaskInstanceState
|
|
22
|
+
|
|
23
|
+
from airflow_unicore_integration.hooks import unicore_hooks
|
|
24
|
+
|
|
25
|
+
from ..util.job import JobDescriptionGenerator
|
|
26
|
+
from ..util.job import NaiveJobDescriptionGenerator
|
|
27
|
+
|
|
28
|
+
STATE_MAPPINGS: Dict[uc_client.JobStatus, TaskInstanceState] = {
|
|
29
|
+
uc_client.JobStatus.UNDEFINED: TaskInstanceState.FAILED,
|
|
30
|
+
uc_client.JobStatus.READY: TaskInstanceState.QUEUED,
|
|
31
|
+
uc_client.JobStatus.STAGINGIN: TaskInstanceState.QUEUED,
|
|
32
|
+
uc_client.JobStatus.QUEUED: TaskInstanceState.QUEUED,
|
|
33
|
+
uc_client.JobStatus.RUNNING: TaskInstanceState.RUNNING,
|
|
34
|
+
uc_client.JobStatus.STAGINGOUT: TaskInstanceState.RUNNING,
|
|
35
|
+
uc_client.JobStatus.SUCCESSFUL: TaskInstanceState.SUCCESS,
|
|
36
|
+
uc_client.JobStatus.FAILED: TaskInstanceState.FAILED,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class UnicoreExecutor(BaseExecutor):
|
|
41
|
+
|
|
42
|
+
def start(self):
|
|
43
|
+
self.active_jobs: Dict[TaskInstanceKey, uc_client.Job] = {}
|
|
44
|
+
self.uc_conn = unicore_hooks.UnicoreHook().get_conn()
|
|
45
|
+
# TODO get job description generator class and init params from config
|
|
46
|
+
self.job_descr_generator: JobDescriptionGenerator = NaiveJobDescriptionGenerator()
|
|
47
|
+
|
|
48
|
+
def sync(self) -> None:
|
|
49
|
+
# iterate through task collection and update task/ job status - delete if needed
|
|
50
|
+
for task, job in list(self.active_jobs.items()):
|
|
51
|
+
state = STATE_MAPPINGS[job.status]
|
|
52
|
+
if state == TaskInstanceState.FAILED:
|
|
53
|
+
self.fail(task)
|
|
54
|
+
self._forward_unicore_log(task, job)
|
|
55
|
+
self.active_jobs.pop(task)
|
|
56
|
+
elif state == TaskInstanceState.SUCCESS:
|
|
57
|
+
self.success(task)
|
|
58
|
+
self._forward_unicore_log(task, job)
|
|
59
|
+
self.active_jobs.pop(task)
|
|
60
|
+
elif state == TaskInstanceState.RUNNING:
|
|
61
|
+
self.running_state(task, state)
|
|
62
|
+
|
|
63
|
+
return super().sync()
|
|
64
|
+
|
|
65
|
+
def _forward_unicore_log(self, task: TaskInstanceKey, job: uc_client.Job) -> List[str]:
|
|
66
|
+
# TODO retrieve unicore logs from job directory and return
|
|
67
|
+
return []
|
|
68
|
+
|
|
69
|
+
def _get_unicore_client(self, executor_config: dict | None = {}):
|
|
70
|
+
# TODO fix this only temporary solution
|
|
71
|
+
return self.uc_conn
|
|
72
|
+
# END TODO fix this
|
|
73
|
+
# include client desires from executor_config
|
|
74
|
+
unicore_conn_id = executor_config.get( # type: ignore
|
|
75
|
+
UnicoreExecutor.EXECUTOR_CONFIG_UNICORE_CONN_KEY,
|
|
76
|
+
conf.get("unicore.executor", "UNICORE_CONN_ID"),
|
|
77
|
+
) # task can provide a different unicore connection to use, else airflow-wide default is used
|
|
78
|
+
self.log.info(f"Using base unicore connection with id '{unicore_conn_id}'")
|
|
79
|
+
hook = unicore_hooks.UnicoreHook(uc_conn_id=unicore_conn_id)
|
|
80
|
+
unicore_site = executor_config.get( # type: ignore
|
|
81
|
+
UnicoreExecutor.EXECUTOR_CONFIG_UNICORE_SITE_KEY, None
|
|
82
|
+
) # task can provide a different site to run at, else default from connetion is used
|
|
83
|
+
unicore_credential = executor_config.get( # type: ignore
|
|
84
|
+
UnicoreExecutor.EXECUTOR_CONFIG_UNICORE_CREDENTIAL_KEY, None
|
|
85
|
+
) # task can provide a different credential to use, else default from connection is used
|
|
86
|
+
return hook.get_conn(
|
|
87
|
+
overwrite_base_url=unicore_site, overwrite_credential=unicore_credential
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
def _submit_job(self, workload: ExecuteTask):
|
|
91
|
+
uc_client = self._get_unicore_client(executor_config=workload.ti.executor_config)
|
|
92
|
+
job_descr = self._create_job_description(workload)
|
|
93
|
+
self.log.info("Generated job description")
|
|
94
|
+
self.log.debug(str(job_descr))
|
|
95
|
+
job = uc_client.new_job(job_descr)
|
|
96
|
+
self.log.info("Submitted unicore job")
|
|
97
|
+
self.active_jobs[workload.ti.key] = job
|
|
98
|
+
return job
|
|
99
|
+
|
|
100
|
+
def _create_job_description(self, workload: ExecuteTask) -> Dict[str, Any]:
|
|
101
|
+
return self.job_descr_generator.create_job_description(workload)
|
|
102
|
+
|
|
103
|
+
def queue_workload(self, workload: ExecuteTask | All, session):
|
|
104
|
+
if not isinstance(workload, ExecuteTask):
|
|
105
|
+
raise TypeError(f"Don't know how to queue workload of type {type(workload).__name__}")
|
|
106
|
+
|
|
107
|
+
# submit job to unicore and add to active_jobs dict for task state management
|
|
108
|
+
job = self._submit_job(workload)
|
|
109
|
+
self.active_jobs[workload.ti.key] = job
|
|
110
|
+
|
|
111
|
+
def end(self, heartbeat_interval=10) -> None:
|
|
112
|
+
# wait for current jobs to finish, dont start any new ones
|
|
113
|
+
while True:
|
|
114
|
+
self.sync()
|
|
115
|
+
if not self.active_jobs:
|
|
116
|
+
break
|
|
117
|
+
time.sleep(heartbeat_interval)
|
|
118
|
+
|
|
119
|
+
def terminate(self):
|
|
120
|
+
# terminate all jobs
|
|
121
|
+
for task, job in list(self.active_jobs.items()):
|
|
122
|
+
job.abort()
|
|
123
|
+
self.end()
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from airflow.hooks.base import BaseHook
|
|
4
|
+
from pyunicore import client
|
|
5
|
+
from pyunicore import credentials
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class UnicoreHook(BaseHook):
|
|
9
|
+
"""
|
|
10
|
+
Interact with Unicore.
|
|
11
|
+
|
|
12
|
+
Creates Unicore Clients from airflow connections.
|
|
13
|
+
|
|
14
|
+
:param uc_conn_id: The unicore connection id - default: uc_default
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
conn_name_attr = "uc_conn_id"
|
|
18
|
+
default_conn_name = "uc_default"
|
|
19
|
+
conn_type = "unicore"
|
|
20
|
+
hook_name = "Unicore"
|
|
21
|
+
|
|
22
|
+
def __init__(self, uc_conn_id: str = default_conn_name) -> None:
|
|
23
|
+
super().__init__()
|
|
24
|
+
self.uc_conn_id = uc_conn_id
|
|
25
|
+
|
|
26
|
+
def get_conn(
|
|
27
|
+
self,
|
|
28
|
+
overwrite_base_url: str | None = None,
|
|
29
|
+
overwrite_credential: credentials.Credential | None = None,
|
|
30
|
+
) -> client.Client:
|
|
31
|
+
"""Return a Unicore Client. base_url and credentials may be overwritten."""
|
|
32
|
+
self.log.debug(
|
|
33
|
+
f"Gettig connection with id '{self.uc_conn_id}' from secrets backend. Will be modified with user input for UNICORE."
|
|
34
|
+
)
|
|
35
|
+
params = self.get_connection(self.uc_conn_id)
|
|
36
|
+
base_url = params.host
|
|
37
|
+
credential = credentials.UsernamePassword(params.login, params.password)
|
|
38
|
+
if overwrite_base_url is not None:
|
|
39
|
+
base_url = overwrite_base_url
|
|
40
|
+
if overwrite_credential is not None:
|
|
41
|
+
credential = overwrite_credential
|
|
42
|
+
conn = client.Client(credential, base_url)
|
|
43
|
+
return conn
|
|
44
|
+
|
|
45
|
+
def test_connection(self) -> tuple[bool, str]:
|
|
46
|
+
"""Test the connection by sending an access_info request"""
|
|
47
|
+
conn = self.get_conn()
|
|
48
|
+
conn.access_info()
|
|
49
|
+
return True, "Connection successfully tested"
|
|
File without changes
|