airflow-unicore-integration 0.1.8__tar.gz → 0.1.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {airflow_unicore_integration-0.1.8/src/airflow_unicore_integration.egg-info → airflow_unicore_integration-0.1.10}/PKG-INFO +1 -1
- {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/pyproject.toml +1 -1
- {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration/util/job.py +87 -23
- airflow_unicore_integration-0.1.10/src/airflow_unicore_integration/util/launch_script_content.py +90 -0
- {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10/src/airflow_unicore_integration.egg-info}/PKG-INFO +1 -1
- {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration.egg-info/SOURCES.txt +2 -1
- {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/LICENSE +0 -0
- {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/README.rst +0 -0
- {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/setup.cfg +0 -0
- {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration/__init__.py +0 -0
- {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration/executors/__init__.py +0 -0
- {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration/executors/run_task_via_supervisor.py +0 -0
- {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration/executors/unicore_executor.py +0 -0
- {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration/hooks/__init__.py +0 -0
- {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration/hooks/unicore_hooks.py +0 -0
- {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration/operators/__init__.py +0 -0
- {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration/operators/unicore_operators.py +0 -0
- {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration/policies/__init__.py +0 -0
- {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration.egg-info/dependency_links.txt +0 -0
- {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration.egg-info/entry_points.txt +0 -0
- {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration.egg-info/requires.txt +0 -0
- {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration.egg-info/top_level.txt +0 -0
|
@@ -1,10 +1,16 @@
|
|
|
1
|
-
import
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
2
3
|
from typing import Any
|
|
3
4
|
from typing import Dict
|
|
4
5
|
|
|
5
6
|
from airflow.configuration import conf
|
|
6
7
|
from airflow.executors.workloads import ExecuteTask
|
|
7
8
|
from airflow.models.taskinstancekey import TaskInstanceKey
|
|
9
|
+
from airflow.providers.git.hooks.git import GitHook
|
|
10
|
+
|
|
11
|
+
from .launch_script_content import LAUNCH_SCRIPT_CONTENT_STR
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
8
14
|
|
|
9
15
|
|
|
10
16
|
class JobDescriptionGenerator:
|
|
@@ -18,6 +24,7 @@ class JobDescriptionGenerator:
|
|
|
18
24
|
EXECUTOR_CONFIG_PARAMETERS = "Parameters" # gets added to the unicore job description
|
|
19
25
|
EXECUTOR_CONFIG_PROJECT = "Project" # gets added to the unicore job description
|
|
20
26
|
EXECUTOR_CONFIG_PRE_COMMANDS = "precommands" # gets added to the unicore job description
|
|
27
|
+
EXECUTOR_CONFIG_POST_COMMANDS = "postcommands" # gets added to the unicore job descirption
|
|
21
28
|
EXECUTOR_CONFIG_UNICORE_CONN_KEY = (
|
|
22
29
|
"unicore_connection_id" # alternative connection id for the Unicore connection to use
|
|
23
30
|
)
|
|
@@ -27,12 +34,17 @@ class JobDescriptionGenerator:
|
|
|
27
34
|
def create_job_description(self, workload: ExecuteTask) -> Dict[str, Any]:
|
|
28
35
|
raise NotImplementedError()
|
|
29
36
|
|
|
37
|
+
def get_job_name(self, key: TaskInstanceKey) -> str:
|
|
38
|
+
return f"{key.dag_id} - {key.task_id} - {key.run_id} - {key.try_number}"
|
|
39
|
+
|
|
30
40
|
|
|
31
41
|
class NaiveJobDescriptionGenerator(JobDescriptionGenerator):
|
|
32
42
|
"""
|
|
33
|
-
This class generates a naive unicore job, that expects there to be a working python env
|
|
43
|
+
This class generates a naive unicore job, that expects there to be a working python env containing airflow and any other required dependencies on the executing system.
|
|
34
44
|
"""
|
|
35
45
|
|
|
46
|
+
GIT_DAG_BUNDLE_CLASSPATH = "airflow.providers.git.bundles.git.GitDagBundle"
|
|
47
|
+
|
|
36
48
|
def create_job_description(self, workload: ExecuteTask) -> Dict[str, Any]:
|
|
37
49
|
key: TaskInstanceKey = workload.ti.key
|
|
38
50
|
executor_config = workload.ti.executor_config
|
|
@@ -46,6 +58,7 @@ class NaiveJobDescriptionGenerator(JobDescriptionGenerator):
|
|
|
46
58
|
user_added_resources: Dict[str, str] = executor_config.get(JobDescriptionGenerator.EXECUTOR_CONFIG_RESOURCES, None) # type: ignore
|
|
47
59
|
user_added_pre_commands: list[str] = executor_config.get(JobDescriptionGenerator.EXECUTOR_CONFIG_PRE_COMMANDS, []) # type: ignore
|
|
48
60
|
user_defined_python_env: str = workload.ti.executor_config.get(JobDescriptionGenerator.EXECUTOR_CONFIG_PYTHON_ENV_KEY, None) # type: ignore
|
|
61
|
+
user_added_post_commands: list[str] = executor_config.get(JobDescriptionGenerator.EXECUTOR_CONFIG_POST_COMMANDS, []) # type: ignore
|
|
49
62
|
# get local dag path from cmd and fix dag path in arguments
|
|
50
63
|
dag_rel_path = str(workload.dag_rel_path)
|
|
51
64
|
if dag_rel_path.startswith("DAG_FOLDER"):
|
|
@@ -56,56 +69,107 @@ class NaiveJobDescriptionGenerator(JobDescriptionGenerator):
|
|
|
56
69
|
server = conf.get(
|
|
57
70
|
"unicore.executor", "execution_api_server_url", fallback=default_execution_api_server
|
|
58
71
|
)
|
|
72
|
+
logger.debug(f"Server is {server}")
|
|
59
73
|
|
|
60
74
|
# check which python virtualenv to use
|
|
61
75
|
if user_defined_python_env:
|
|
62
76
|
python_env = user_defined_python_env
|
|
63
77
|
else:
|
|
64
78
|
python_env = conf.get("unicore.executor", "DEFAULT_ENV")
|
|
79
|
+
tmp_dir = conf.get("unicore.executor", "TMP_DIR", "/tmp")
|
|
65
80
|
# prepare dag file to be uploaded via unicore
|
|
66
81
|
# dag_file = open("/tmp/test")
|
|
67
82
|
# dag_content = dag_file.readlines()
|
|
68
83
|
# dag_import = {"To": dag_rel_path, "Data": dag_content}
|
|
69
84
|
worker_script_import = {
|
|
70
85
|
"To": "run_task_via_supervisor.py",
|
|
71
|
-
"From": "https://gist.githubusercontent.com/cboettcher/3f1101a1d1b67e7944d17c02ecd69930/raw/1d90bf38199d8c0adf47a79c8840c3e3ddf57462/run_task_via_supervisor.py",
|
|
86
|
+
# "From": "https://gist.githubusercontent.com/cboettcher/3f1101a1d1b67e7944d17c02ecd69930/raw/1d90bf38199d8c0adf47a79c8840c3e3ddf57462/run_task_via_supervisor.py",
|
|
87
|
+
"Data": LAUNCH_SCRIPT_CONTENT_STR,
|
|
72
88
|
}
|
|
73
89
|
# start filling the actual job description
|
|
74
|
-
job_descr_dict["Name"] =
|
|
90
|
+
job_descr_dict["Name"] = self.get_job_name(key)
|
|
75
91
|
job_descr_dict["Executable"] = (
|
|
76
|
-
"python" # TODO may require module load to be setup for some systems
|
|
92
|
+
f". airflow_config.env && . {python_env} && python run_task_via_supervisor.py --json-string '{workload.model_dump_json()}'" # TODO may require module load to be setup for some systems
|
|
77
93
|
)
|
|
78
|
-
job_descr_dict["Arguments"] = [
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
94
|
+
# job_descr_dict["Arguments"] = [
|
|
95
|
+
# "-c",
|
|
96
|
+
# "source airflow_config.env",
|
|
97
|
+
# "source {python_env}/bin/activate",
|
|
98
|
+
# "python",
|
|
99
|
+
# "run_task_via_supervisor.py",
|
|
100
|
+
# f"--json-string '{workload.model_dump_json()}'",
|
|
101
|
+
# ]
|
|
102
|
+
|
|
82
103
|
job_descr_dict["Environment"] = {
|
|
83
104
|
"AIRFLOW__CORE__EXECUTION_API_SERVER_URL": server,
|
|
84
|
-
"AIRFLOW__CORE__DAGS_FOLDER": "./",
|
|
105
|
+
# "AIRFLOW__CORE__DAGS_FOLDER": "./",
|
|
85
106
|
"AIRFLOW__LOGGING__LOGGING_LEVEL": "DEBUG",
|
|
86
107
|
"AIRFLOW__CORE__EXECUTOR": "LocalExecutor,airflow_unicore_integration.executors.unicore_executor.UnicoreExecutor",
|
|
108
|
+
"AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_STORAGE_PATH": f"{tmp_dir}/{workload.ti.id}/dagbundle",
|
|
87
109
|
}
|
|
88
110
|
|
|
89
111
|
# build filecontent string for importing in the job | this is needed to avoid confusing nested quotes and trying to escape them properly when using unicore env vars directly
|
|
90
|
-
env_file_content: list[str] = [
|
|
91
|
-
|
|
92
|
-
|
|
112
|
+
env_file_content: list[str] = []
|
|
113
|
+
|
|
114
|
+
# transmit needed dag bundle information (and possibly files) to job directory
|
|
115
|
+
bundle_str = conf.get("dag.processor", "dag_bundle_config_list")
|
|
116
|
+
logger.debug(f"Dag Bundle config is: {bundle_str}")
|
|
117
|
+
bundle_dict = json.loads(bundle_str)
|
|
118
|
+
conn_id_to_transmit = None
|
|
119
|
+
bundle_type = None
|
|
93
120
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
121
|
+
for bundle in bundle_dict:
|
|
122
|
+
if bundle["name"] == workload.bundle_info.name:
|
|
123
|
+
if bundle["classpath"] == NaiveJobDescriptionGenerator.GIT_DAG_BUNDLE_CLASSPATH:
|
|
124
|
+
bundle_type = NaiveJobDescriptionGenerator.GIT_DAG_BUNDLE_CLASSPATH
|
|
125
|
+
env_file_content.append(
|
|
126
|
+
f"export AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_CONFIG_LIST='[{json.dumps(bundle)}]'"
|
|
127
|
+
)
|
|
128
|
+
conn_id_to_transmit = bundle["kwargs"]["git_conn_id"]
|
|
129
|
+
break
|
|
130
|
+
# TODO handle other bundle types
|
|
131
|
+
|
|
132
|
+
if bundle_type:
|
|
133
|
+
if (
|
|
134
|
+
bundle_type == NaiveJobDescriptionGenerator.GIT_DAG_BUNDLE_CLASSPATH
|
|
135
|
+
and conn_id_to_transmit
|
|
136
|
+
):
|
|
137
|
+
git_hook = GitHook(conn_id_to_transmit)
|
|
138
|
+
git_remote_url = git_hook.repo_url
|
|
139
|
+
git_local_url = f"{tmp_dir}/{workload.ti.id}/dagmirror"
|
|
140
|
+
# add precommand to clone repo on ligon node
|
|
141
|
+
git_precommand = f". {python_env} && mkdir -p {tmp_dir}/{workload.ti.id}/dagmirror && mkdir -p {tmp_dir}/{workload.ti.id}/dagbundle && git clone {git_remote_url} {git_local_url}"
|
|
142
|
+
logger.info(f"git precommand is {git_precommand}")
|
|
143
|
+
user_added_pre_commands.append(git_precommand)
|
|
144
|
+
# add connection to local clone to env of job
|
|
145
|
+
airflow_conn_string = json.dumps(
|
|
146
|
+
{"conn_type": "git", "host": f"file://{git_local_url}"}
|
|
147
|
+
)
|
|
148
|
+
env_file_content.append(
|
|
149
|
+
f"export AIRFLOW_CONN_{str(conn_id_to_transmit).upper()}='{airflow_conn_string}'"
|
|
150
|
+
)
|
|
151
|
+
logger.info(f"connection is '{airflow_conn_string}'")
|
|
152
|
+
# add cleanup of local git repo to job description
|
|
153
|
+
git_cleanup_command = f"rm -r {tmp_dir}/{workload.ti.id}"
|
|
154
|
+
logger.info(f"git cleanup is {git_cleanup_command}")
|
|
155
|
+
user_added_post_commands.append(git_cleanup_command)
|
|
98
156
|
|
|
99
157
|
airflow_env_import = {"To": "airflow_config.env", "Data": env_file_content}
|
|
100
158
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
)
|
|
104
|
-
|
|
159
|
+
job_descr_dict["Imports"] = [worker_script_import, airflow_env_import]
|
|
160
|
+
|
|
161
|
+
if len(user_added_pre_commands) > 0:
|
|
162
|
+
precommand_import = {"To": "precommand.sh", "Data": user_added_pre_commands}
|
|
163
|
+
job_descr_dict["Imports"].append(precommand_import)
|
|
164
|
+
job_descr_dict["User precommand"] = "bash precommand.sh"
|
|
165
|
+
if len(user_added_post_commands) > 0:
|
|
166
|
+
postcommand_import = {"To": "postcommand.sh", "Data": user_added_post_commands}
|
|
167
|
+
job_descr_dict["Imports"].append(postcommand_import)
|
|
168
|
+
job_descr_dict["User postcommand"] = "bash postcommand.sh"
|
|
169
|
+
|
|
105
170
|
job_descr_dict["RunUserPrecommandOnLoginNode"] = (
|
|
106
|
-
"
|
|
171
|
+
"true" # precommand needs public internet access to clone dag repos
|
|
107
172
|
)
|
|
108
|
-
job_descr_dict["Imports"] = [worker_script_import, airflow_env_import]
|
|
109
173
|
# add user defined options to description
|
|
110
174
|
if user_added_env:
|
|
111
175
|
job_descr_dict["Environment"].update(user_added_env)
|
airflow_unicore_integration-0.1.10/src/airflow_unicore_integration/util/launch_script_content.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
LAUNCH_SCRIPT_CONTENT_STR = '''# get some debug info if before anything can fail
|
|
2
|
+
import sys
|
|
3
|
+
print(sys.executable)
|
|
4
|
+
|
|
5
|
+
from airflow.executors import workloads
|
|
6
|
+
from airflow.sdk.execution_time.supervisor import supervise
|
|
7
|
+
from airflow.configuration import conf
|
|
8
|
+
from pydantic import TypeAdapter
|
|
9
|
+
import argparse
|
|
10
|
+
import structlog
|
|
11
|
+
import sys
|
|
12
|
+
log = structlog.get_logger(logger_name=__name__)
|
|
13
|
+
|
|
14
|
+
"""
|
|
15
|
+
Usage:
|
|
16
|
+
|
|
17
|
+
python run_task_via_supervisor.py [--json-string <workload string> | --json-file <workload filepath>]
|
|
18
|
+
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def execute_workload_locally(workload: workloads.All):
|
|
23
|
+
if not isinstance(workload, workloads.ExecuteTask):
|
|
24
|
+
raise ValueError(f"Executor does not know how to handle {type(workload)}")
|
|
25
|
+
|
|
26
|
+
base_url = conf.get("api", "base_url", fallback="/")
|
|
27
|
+
default_execution_api_server = f"{base_url.rstrip('/')}/execution/"
|
|
28
|
+
server = conf.get("core", "execution_api_server_url", fallback=default_execution_api_server)
|
|
29
|
+
log.info(f"Connecting to server:{server}" )
|
|
30
|
+
|
|
31
|
+
log.debug(f"Workload is: {workload}")
|
|
32
|
+
log.debug(f"Dag Bundle is: {workload.bundle_info}")
|
|
33
|
+
|
|
34
|
+
supervise(
|
|
35
|
+
# This is the "wrong" ti type, but it duck types the same. TODO: Create a protocol for this.
|
|
36
|
+
ti=workload.ti, # type: ignore[arg-type]
|
|
37
|
+
dag_rel_path=workload.dag_rel_path,
|
|
38
|
+
bundle_info=workload.bundle_info,
|
|
39
|
+
token=workload.token,
|
|
40
|
+
server=server,
|
|
41
|
+
log_path=workload.log_path,
|
|
42
|
+
# Include the output of the task to stdout too, so that in process logs can be read from via
|
|
43
|
+
# unicore as job logs.
|
|
44
|
+
subprocess_logs_to_stdout=True,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def main():
|
|
49
|
+
parser = argparse.ArgumentParser(
|
|
50
|
+
description="Execute a workload in a Containerised executor using the task SDK."
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Create a mutually exclusive group to ensure that only one of the flags is set
|
|
54
|
+
group = parser.add_mutually_exclusive_group(required=True)
|
|
55
|
+
group.add_argument(
|
|
56
|
+
"--json-path",
|
|
57
|
+
help="Path to the input JSON file containing the execution workload payload.",
|
|
58
|
+
type=str,
|
|
59
|
+
)
|
|
60
|
+
group.add_argument(
|
|
61
|
+
"--json-string",
|
|
62
|
+
help="The JSON string itself containing the execution workload payload.",
|
|
63
|
+
type=str,
|
|
64
|
+
)
|
|
65
|
+
args = parser.parse_args()
|
|
66
|
+
|
|
67
|
+
decoder = TypeAdapter[workloads.All](workloads.All)
|
|
68
|
+
|
|
69
|
+
if args.json_path:
|
|
70
|
+
try:
|
|
71
|
+
with open(args.json_path) as file:
|
|
72
|
+
input_data = file.read()
|
|
73
|
+
workload = decoder.validate_json(input_data)
|
|
74
|
+
except Exception as e: # noqa: B902
|
|
75
|
+
log.error("Failed to read file", error=str(e))
|
|
76
|
+
sys.exit(1)
|
|
77
|
+
|
|
78
|
+
elif args.json_string:
|
|
79
|
+
try:
|
|
80
|
+
workload = decoder.validate_json(args.json_string)
|
|
81
|
+
except Exception as e:
|
|
82
|
+
log.error("Failed to parse input JSON string", error=str(e))
|
|
83
|
+
sys.exit(1)
|
|
84
|
+
|
|
85
|
+
execute_workload_locally(workload)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
if __name__ == "__main__":
|
|
89
|
+
main()
|
|
90
|
+
'''
|
|
@@ -16,4 +16,5 @@ src/airflow_unicore_integration/hooks/unicore_hooks.py
|
|
|
16
16
|
src/airflow_unicore_integration/operators/__init__.py
|
|
17
17
|
src/airflow_unicore_integration/operators/unicore_operators.py
|
|
18
18
|
src/airflow_unicore_integration/policies/__init__.py
|
|
19
|
-
src/airflow_unicore_integration/util/job.py
|
|
19
|
+
src/airflow_unicore_integration/util/job.py
|
|
20
|
+
src/airflow_unicore_integration/util/launch_script_content.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|