airflow-unicore-integration 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow_unicore_integration/util/job.py +72 -19
- airflow_unicore_integration/util/launch_script_content.py +90 -0
- {airflow_unicore_integration-0.1.8.dist-info → airflow_unicore_integration-0.1.9.dist-info}/METADATA +1 -1
- {airflow_unicore_integration-0.1.8.dist-info → airflow_unicore_integration-0.1.9.dist-info}/RECORD +8 -7
- {airflow_unicore_integration-0.1.8.dist-info → airflow_unicore_integration-0.1.9.dist-info}/WHEEL +0 -0
- {airflow_unicore_integration-0.1.8.dist-info → airflow_unicore_integration-0.1.9.dist-info}/entry_points.txt +0 -0
- {airflow_unicore_integration-0.1.8.dist-info → airflow_unicore_integration-0.1.9.dist-info}/licenses/LICENSE +0 -0
- {airflow_unicore_integration-0.1.8.dist-info → airflow_unicore_integration-0.1.9.dist-info}/top_level.txt +0 -0
|
@@ -1,10 +1,16 @@
|
|
|
1
|
-
import
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
2
3
|
from typing import Any
|
|
3
4
|
from typing import Dict
|
|
4
5
|
|
|
5
6
|
from airflow.configuration import conf
|
|
6
7
|
from airflow.executors.workloads import ExecuteTask
|
|
7
8
|
from airflow.models.taskinstancekey import TaskInstanceKey
|
|
9
|
+
from airflow.providers.git.hooks.git import GitHook
|
|
10
|
+
|
|
11
|
+
from .launch_script_content import LAUNCH_SCRIPT_CONTENT_STR
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
8
14
|
|
|
9
15
|
|
|
10
16
|
class JobDescriptionGenerator:
|
|
@@ -18,6 +24,7 @@ class JobDescriptionGenerator:
|
|
|
18
24
|
EXECUTOR_CONFIG_PARAMETERS = "Parameters" # gets added to the unicore job description
|
|
19
25
|
EXECUTOR_CONFIG_PROJECT = "Project" # gets added to the unicore job description
|
|
20
26
|
EXECUTOR_CONFIG_PRE_COMMANDS = "precommands" # gets added to the unicore job description
|
|
27
|
+
EXECUTOR_CONFIG_POST_COMMANDS = "postcommands" # gets added to the unicore job descirption
|
|
21
28
|
EXECUTOR_CONFIG_UNICORE_CONN_KEY = (
|
|
22
29
|
"unicore_connection_id" # alternative connection id for the Unicore connection to use
|
|
23
30
|
)
|
|
@@ -33,6 +40,8 @@ class NaiveJobDescriptionGenerator(JobDescriptionGenerator):
|
|
|
33
40
|
This class generates a naive unicore job, that expects there to be a working python env containign airflow and any other required dependencies on the executing system.
|
|
34
41
|
"""
|
|
35
42
|
|
|
43
|
+
GIT_DAG_BUNDLE_CLASSPATH = "airflow.providers.git.bundles.git.GitDagBundle"
|
|
44
|
+
|
|
36
45
|
def create_job_description(self, workload: ExecuteTask) -> Dict[str, Any]:
|
|
37
46
|
key: TaskInstanceKey = workload.ti.key
|
|
38
47
|
executor_config = workload.ti.executor_config
|
|
@@ -46,6 +55,7 @@ class NaiveJobDescriptionGenerator(JobDescriptionGenerator):
|
|
|
46
55
|
user_added_resources: Dict[str, str] = executor_config.get(JobDescriptionGenerator.EXECUTOR_CONFIG_RESOURCES, None) # type: ignore
|
|
47
56
|
user_added_pre_commands: list[str] = executor_config.get(JobDescriptionGenerator.EXECUTOR_CONFIG_PRE_COMMANDS, []) # type: ignore
|
|
48
57
|
user_defined_python_env: str = workload.ti.executor_config.get(JobDescriptionGenerator.EXECUTOR_CONFIG_PYTHON_ENV_KEY, None) # type: ignore
|
|
58
|
+
user_added_post_commands: list[str] = executor_config.get(JobDescriptionGenerator.EXECUTOR_CONFIG_POST_COMMANDS, []) # type: ignore
|
|
49
59
|
# get local dag path from cmd and fix dag path in arguments
|
|
50
60
|
dag_rel_path = str(workload.dag_rel_path)
|
|
51
61
|
if dag_rel_path.startswith("DAG_FOLDER"):
|
|
@@ -56,54 +66,97 @@ class NaiveJobDescriptionGenerator(JobDescriptionGenerator):
|
|
|
56
66
|
server = conf.get(
|
|
57
67
|
"unicore.executor", "execution_api_server_url", fallback=default_execution_api_server
|
|
58
68
|
)
|
|
69
|
+
logger.debug(f"Server is {server}")
|
|
59
70
|
|
|
60
71
|
# check which python virtualenv to use
|
|
61
72
|
if user_defined_python_env:
|
|
62
73
|
python_env = user_defined_python_env
|
|
63
74
|
else:
|
|
64
75
|
python_env = conf.get("unicore.executor", "DEFAULT_ENV")
|
|
76
|
+
tmp_dir = conf.get("unicore.executor", "TMP_DIR", "/tmp")
|
|
65
77
|
# prepare dag file to be uploaded via unicore
|
|
66
78
|
# dag_file = open("/tmp/test")
|
|
67
79
|
# dag_content = dag_file.readlines()
|
|
68
80
|
# dag_import = {"To": dag_rel_path, "Data": dag_content}
|
|
69
81
|
worker_script_import = {
|
|
70
82
|
"To": "run_task_via_supervisor.py",
|
|
71
|
-
"From": "https://gist.githubusercontent.com/cboettcher/3f1101a1d1b67e7944d17c02ecd69930/raw/1d90bf38199d8c0adf47a79c8840c3e3ddf57462/run_task_via_supervisor.py",
|
|
83
|
+
# "From": "https://gist.githubusercontent.com/cboettcher/3f1101a1d1b67e7944d17c02ecd69930/raw/1d90bf38199d8c0adf47a79c8840c3e3ddf57462/run_task_via_supervisor.py",
|
|
84
|
+
"Data": LAUNCH_SCRIPT_CONTENT_STR,
|
|
72
85
|
}
|
|
73
86
|
# start filling the actual job description
|
|
74
87
|
job_descr_dict["Name"] = f"{key.dag_id} - {key.task_id} - {key.run_id} - {key.try_number}"
|
|
75
88
|
job_descr_dict["Executable"] = (
|
|
76
|
-
"python" # TODO may require module load to be setup for some systems
|
|
89
|
+
f". airflow_config.env && . {python_env} && python run_task_via_supervisor.py --json-string '{workload.model_dump_json()}'" # TODO may require module load to be setup for some systems
|
|
77
90
|
)
|
|
78
|
-
job_descr_dict["Arguments"] = [
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
91
|
+
# job_descr_dict["Arguments"] = [
|
|
92
|
+
# "-c",
|
|
93
|
+
# "source airflow_config.env",
|
|
94
|
+
# "source {python_env}/bin/activate",
|
|
95
|
+
# "python",
|
|
96
|
+
# "run_task_via_supervisor.py",
|
|
97
|
+
# f"--json-string '{workload.model_dump_json()}'",
|
|
98
|
+
# ]
|
|
99
|
+
|
|
82
100
|
job_descr_dict["Environment"] = {
|
|
83
101
|
"AIRFLOW__CORE__EXECUTION_API_SERVER_URL": server,
|
|
84
|
-
"AIRFLOW__CORE__DAGS_FOLDER": "./",
|
|
102
|
+
# "AIRFLOW__CORE__DAGS_FOLDER": "./",
|
|
85
103
|
"AIRFLOW__LOGGING__LOGGING_LEVEL": "DEBUG",
|
|
86
104
|
"AIRFLOW__CORE__EXECUTOR": "LocalExecutor,airflow_unicore_integration.executors.unicore_executor.UnicoreExecutor",
|
|
105
|
+
"AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_STORAGE_PATH": f"{tmp_dir}/{workload.ti.id}/dagbundle",
|
|
87
106
|
}
|
|
88
107
|
|
|
89
108
|
# build filecontent string for importing in the job | this is needed to avoid confusing nested quotes and trying to escape them properly when using unicore env vars directly
|
|
90
|
-
env_file_content: list[str] = [
|
|
91
|
-
f"export AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_CONFIG_LIST='{os.environ.get("AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_CONFIG_LIST", "")}'"
|
|
92
|
-
]
|
|
109
|
+
env_file_content: list[str] = []
|
|
93
110
|
|
|
94
|
-
#
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
111
|
+
# transmit needed dag bundle information (and possibly files) to job directory
|
|
112
|
+
bundle_str = conf.get("dag.processor", "dag_bundle_config_list")
|
|
113
|
+
logger.debug(f"Dag Bundle config is: {bundle_str}")
|
|
114
|
+
bundle_dict = json.loads(bundle_str)
|
|
115
|
+
conn_id_to_transmit = None
|
|
116
|
+
bundle_type = None
|
|
117
|
+
|
|
118
|
+
for bundle in bundle_dict:
|
|
119
|
+
if bundle["name"] == workload.bundle_info.name:
|
|
120
|
+
if bundle["classpath"] == NaiveJobDescriptionGenerator.GIT_DAG_BUNDLE_CLASSPATH:
|
|
121
|
+
bundle_type = NaiveJobDescriptionGenerator.GIT_DAG_BUNDLE_CLASSPATH
|
|
122
|
+
env_file_content.append(
|
|
123
|
+
f"export AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_CONFIG_LIST='[{json.dumps(bundle)}]'"
|
|
124
|
+
)
|
|
125
|
+
conn_id_to_transmit = bundle["kwargs"]["git_conn_id"]
|
|
126
|
+
break
|
|
127
|
+
# TODO handle other bundle types
|
|
128
|
+
|
|
129
|
+
if bundle_type:
|
|
130
|
+
if (
|
|
131
|
+
bundle_type == NaiveJobDescriptionGenerator.GIT_DAG_BUNDLE_CLASSPATH
|
|
132
|
+
and conn_id_to_transmit
|
|
133
|
+
):
|
|
134
|
+
git_hook = GitHook(conn_id_to_transmit)
|
|
135
|
+
git_remote_url = git_hook.repo_url
|
|
136
|
+
git_local_url = f"{tmp_dir}/{workload.ti.id}/dagmirror"
|
|
137
|
+
# add precommand to clone repo on ligon node
|
|
138
|
+
git_precommand = f". {python_env} && mkdir -p {tmp_dir}/{workload.ti.id}/dagmirror && mkdir -p {tmp_dir}/{workload.ti.id}/dagbundle && git clone {git_remote_url} {git_local_url}"
|
|
139
|
+
logger.info(f"git precommand is {git_precommand}")
|
|
140
|
+
user_added_pre_commands.append(git_precommand)
|
|
141
|
+
# add connection to local clone to env of job
|
|
142
|
+
airflow_conn_string = json.dumps(
|
|
143
|
+
{"conn_type": "git", "host": f"file://{git_local_url}"}
|
|
144
|
+
)
|
|
145
|
+
env_file_content.append(
|
|
146
|
+
f"export AIRFLOW_CONN_{str(conn_id_to_transmit).upper()}='{airflow_conn_string}'"
|
|
147
|
+
)
|
|
148
|
+
logger.info(f"connection is '{airflow_conn_string}'")
|
|
149
|
+
# add cleanup of local git repo to job description
|
|
150
|
+
git_cleanup_command = f"rm -r {tmp_dir}/{workload.ti.id}"
|
|
151
|
+
logger.info(f"git cleanup is {git_cleanup_command}")
|
|
152
|
+
user_added_post_commands.append(git_cleanup_command)
|
|
98
153
|
|
|
99
154
|
airflow_env_import = {"To": "airflow_config.env", "Data": env_file_content}
|
|
100
155
|
|
|
101
|
-
|
|
102
|
-
f"source airflow_config.env && source {python_env}/bin/activate"
|
|
103
|
-
)
|
|
156
|
+
job_descr_dict["User postcommand"] = ";".join(user_added_post_commands)
|
|
104
157
|
job_descr_dict["User precommand"] = ";".join(user_added_pre_commands)
|
|
105
158
|
job_descr_dict["RunUserPrecommandOnLoginNode"] = (
|
|
106
|
-
"
|
|
159
|
+
"true" # precommand needs public internet access to clone dag repos
|
|
107
160
|
)
|
|
108
161
|
job_descr_dict["Imports"] = [worker_script_import, airflow_env_import]
|
|
109
162
|
# add user defined options to description
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
LAUNCH_SCRIPT_CONTENT_STR = '''# get some debug info if before anything can fail
|
|
2
|
+
import sys
|
|
3
|
+
print(sys.executable)
|
|
4
|
+
|
|
5
|
+
from airflow.executors import workloads
|
|
6
|
+
from airflow.sdk.execution_time.supervisor import supervise
|
|
7
|
+
from airflow.configuration import conf
|
|
8
|
+
from pydantic import TypeAdapter
|
|
9
|
+
import argparse
|
|
10
|
+
import structlog
|
|
11
|
+
import sys
|
|
12
|
+
log = structlog.get_logger(logger_name=__name__)
|
|
13
|
+
|
|
14
|
+
"""
|
|
15
|
+
Usage:
|
|
16
|
+
|
|
17
|
+
python run_task_via_supervisor.py [--json-string <workload string> | --json-file <workload filepath>]
|
|
18
|
+
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def execute_workload_locally(workload: workloads.All):
|
|
23
|
+
if not isinstance(workload, workloads.ExecuteTask):
|
|
24
|
+
raise ValueError(f"Executor does not know how to handle {type(workload)}")
|
|
25
|
+
|
|
26
|
+
base_url = conf.get("api", "base_url", fallback="/")
|
|
27
|
+
default_execution_api_server = f"{base_url.rstrip('/')}/execution/"
|
|
28
|
+
server = conf.get("core", "execution_api_server_url", fallback=default_execution_api_server)
|
|
29
|
+
log.info(f"Connecting to server:{server}" )
|
|
30
|
+
|
|
31
|
+
log.debug(f"Workload is: {workload}")
|
|
32
|
+
log.debug(f"Dag Bundle is: {workload.bundle_info}")
|
|
33
|
+
|
|
34
|
+
supervise(
|
|
35
|
+
# This is the "wrong" ti type, but it duck types the same. TODO: Create a protocol for this.
|
|
36
|
+
ti=workload.ti, # type: ignore[arg-type]
|
|
37
|
+
dag_rel_path=workload.dag_rel_path,
|
|
38
|
+
bundle_info=workload.bundle_info,
|
|
39
|
+
token=workload.token,
|
|
40
|
+
server=server,
|
|
41
|
+
log_path=workload.log_path,
|
|
42
|
+
# Include the output of the task to stdout too, so that in process logs can be read from via
|
|
43
|
+
# unicore as job logs.
|
|
44
|
+
subprocess_logs_to_stdout=True,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def main():
|
|
49
|
+
parser = argparse.ArgumentParser(
|
|
50
|
+
description="Execute a workload in a Containerised executor using the task SDK."
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Create a mutually exclusive group to ensure that only one of the flags is set
|
|
54
|
+
group = parser.add_mutually_exclusive_group(required=True)
|
|
55
|
+
group.add_argument(
|
|
56
|
+
"--json-path",
|
|
57
|
+
help="Path to the input JSON file containing the execution workload payload.",
|
|
58
|
+
type=str,
|
|
59
|
+
)
|
|
60
|
+
group.add_argument(
|
|
61
|
+
"--json-string",
|
|
62
|
+
help="The JSON string itself containing the execution workload payload.",
|
|
63
|
+
type=str,
|
|
64
|
+
)
|
|
65
|
+
args = parser.parse_args()
|
|
66
|
+
|
|
67
|
+
decoder = TypeAdapter[workloads.All](workloads.All)
|
|
68
|
+
|
|
69
|
+
if args.json_path:
|
|
70
|
+
try:
|
|
71
|
+
with open(args.json_path) as file:
|
|
72
|
+
input_data = file.read()
|
|
73
|
+
workload = decoder.validate_json(input_data)
|
|
74
|
+
except Exception as e: # noqa: B902
|
|
75
|
+
log.error("Failed to read file", error=str(e))
|
|
76
|
+
sys.exit(1)
|
|
77
|
+
|
|
78
|
+
elif args.json_string:
|
|
79
|
+
try:
|
|
80
|
+
workload = decoder.validate_json(args.json_string)
|
|
81
|
+
except Exception as e:
|
|
82
|
+
log.error("Failed to parse input JSON string", error=str(e))
|
|
83
|
+
sys.exit(1)
|
|
84
|
+
|
|
85
|
+
execute_workload_locally(workload)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
if __name__ == "__main__":
|
|
89
|
+
main()
|
|
90
|
+
'''
|
{airflow_unicore_integration-0.1.8.dist-info → airflow_unicore_integration-0.1.9.dist-info}/RECORD
RENAMED
|
@@ -7,10 +7,11 @@ airflow_unicore_integration/hooks/unicore_hooks.py,sha256=JjcjogWtN1xveagpkraQuY
|
|
|
7
7
|
airflow_unicore_integration/operators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
airflow_unicore_integration/operators/unicore_operators.py,sha256=xYX1t_QzjpqHZDoEu6jJNNXVmBEnjIlApFWvgYoYoB0,16790
|
|
9
9
|
airflow_unicore_integration/policies/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
-
airflow_unicore_integration/util/job.py,sha256=
|
|
11
|
-
airflow_unicore_integration
|
|
12
|
-
airflow_unicore_integration-0.1.
|
|
13
|
-
airflow_unicore_integration-0.1.
|
|
14
|
-
airflow_unicore_integration-0.1.
|
|
15
|
-
airflow_unicore_integration-0.1.
|
|
16
|
-
airflow_unicore_integration-0.1.
|
|
10
|
+
airflow_unicore_integration/util/job.py,sha256=Te3HNieipFQ3KrCzEf17wvSN5FnBgelW9Q3jMyr6Irg,9590
|
|
11
|
+
airflow_unicore_integration/util/launch_script_content.py,sha256=42_aFpaCMmvFmmUxQDGcudkleX1YSK_yYWE8T41NOy0,2915
|
|
12
|
+
airflow_unicore_integration-0.1.9.dist-info/licenses/LICENSE,sha256=hZ5ouAedeNr8ClHrQE-RLsgMsARcmv3kSZz7tE2BTJE,1526
|
|
13
|
+
airflow_unicore_integration-0.1.9.dist-info/METADATA,sha256=b-AYw8ivzM6mpVLuMDJpl5oHsKFYaIXs3_vtb6cniWM,11188
|
|
14
|
+
airflow_unicore_integration-0.1.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
15
|
+
airflow_unicore_integration-0.1.9.dist-info/entry_points.txt,sha256=PzEfCLYLSawjiYR-HNBzw8-YGfJxs1nPBULevgBQjoY,147
|
|
16
|
+
airflow_unicore_integration-0.1.9.dist-info/top_level.txt,sha256=j45X-uIuOk3oL78iwlpHakMWtUkg__B7zUlJLwmZx6w,28
|
|
17
|
+
airflow_unicore_integration-0.1.9.dist-info/RECORD,,
|
{airflow_unicore_integration-0.1.8.dist-info → airflow_unicore_integration-0.1.9.dist-info}/WHEEL
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|