airflow-unicore-integration 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,7 +20,7 @@ from airflow.executors.workloads import ExecuteTask
20
20
  from airflow.models.taskinstancekey import TaskInstanceKey
21
21
  from airflow.utils.state import TaskInstanceState
22
22
  from pyunicore import client
23
- from pyunicore import credentials
23
+ from pyunicore.credentials import create_credential
24
24
 
25
25
  from ..util.job import JobDescriptionGenerator
26
26
  from ..util.job import NaiveJobDescriptionGenerator
@@ -78,12 +78,11 @@ class UnicoreExecutor(BaseExecutor):
78
78
  overwrite_unicore_credential = executor_config.get( # type: ignore
79
79
  UnicoreExecutor.EXECUTOR_CONFIG_UNICORE_CREDENTIAL_KEY, None
80
80
  ) # task can provide a different credential to use, else default from connection is used
81
- user = conf.get("unicore.executor", "DEFAULT_USER", fallback="demouser")
82
- password = conf.get("unicore.executor", "DEFAULT_PASS", fallback="test123")
81
+ token = conf.get("unicore.executor", "AUTH_TOKEN", fallback="")
83
82
  base_url = conf.get(
84
83
  "unicore.executor", "DEFAULT_URL", fallback="http://localhost:8080/DEMO-SITE/rest/core"
85
84
  )
86
- credential = credentials.UsernamePassword(user, password)
85
+ credential = create_credential(token=token)
87
86
  if overwrite_unicore_site is not None:
88
87
  base_url = overwrite_unicore_site
89
88
  if overwrite_unicore_credential is not None:
@@ -1,10 +1,16 @@
1
- import os
1
+ import json
2
+ import logging
2
3
  from typing import Any
3
4
  from typing import Dict
4
5
 
5
6
  from airflow.configuration import conf
6
7
  from airflow.executors.workloads import ExecuteTask
7
8
  from airflow.models.taskinstancekey import TaskInstanceKey
9
+ from airflow.providers.git.hooks.git import GitHook
10
+
11
+ from .launch_script_content import LAUNCH_SCRIPT_CONTENT_STR
12
+
13
+ logger = logging.getLogger(__name__)
8
14
 
9
15
 
10
16
  class JobDescriptionGenerator:
@@ -18,6 +24,7 @@ class JobDescriptionGenerator:
18
24
  EXECUTOR_CONFIG_PARAMETERS = "Parameters" # gets added to the unicore job description
19
25
  EXECUTOR_CONFIG_PROJECT = "Project" # gets added to the unicore job description
20
26
  EXECUTOR_CONFIG_PRE_COMMANDS = "precommands" # gets added to the unicore job description
27
+ EXECUTOR_CONFIG_POST_COMMANDS = "postcommands" # gets added to the unicore job descirption
21
28
  EXECUTOR_CONFIG_UNICORE_CONN_KEY = (
22
29
  "unicore_connection_id" # alternative connection id for the Unicore connection to use
23
30
  )
@@ -33,6 +40,8 @@ class NaiveJobDescriptionGenerator(JobDescriptionGenerator):
33
40
  This class generates a naive unicore job, that expects there to be a working python env containign airflow and any other required dependencies on the executing system.
34
41
  """
35
42
 
43
+ GIT_DAG_BUNDLE_CLASSPATH = "airflow.providers.git.bundles.git.GitDagBundle"
44
+
36
45
  def create_job_description(self, workload: ExecuteTask) -> Dict[str, Any]:
37
46
  key: TaskInstanceKey = workload.ti.key
38
47
  executor_config = workload.ti.executor_config
@@ -46,6 +55,7 @@ class NaiveJobDescriptionGenerator(JobDescriptionGenerator):
46
55
  user_added_resources: Dict[str, str] = executor_config.get(JobDescriptionGenerator.EXECUTOR_CONFIG_RESOURCES, None) # type: ignore
47
56
  user_added_pre_commands: list[str] = executor_config.get(JobDescriptionGenerator.EXECUTOR_CONFIG_PRE_COMMANDS, []) # type: ignore
48
57
  user_defined_python_env: str = workload.ti.executor_config.get(JobDescriptionGenerator.EXECUTOR_CONFIG_PYTHON_ENV_KEY, None) # type: ignore
58
+ user_added_post_commands: list[str] = executor_config.get(JobDescriptionGenerator.EXECUTOR_CONFIG_POST_COMMANDS, []) # type: ignore
49
59
  # get local dag path from cmd and fix dag path in arguments
50
60
  dag_rel_path = str(workload.dag_rel_path)
51
61
  if dag_rel_path.startswith("DAG_FOLDER"):
@@ -56,54 +66,97 @@ class NaiveJobDescriptionGenerator(JobDescriptionGenerator):
56
66
  server = conf.get(
57
67
  "unicore.executor", "execution_api_server_url", fallback=default_execution_api_server
58
68
  )
69
+ logger.debug(f"Server is {server}")
59
70
 
60
71
  # check which python virtualenv to use
61
72
  if user_defined_python_env:
62
73
  python_env = user_defined_python_env
63
74
  else:
64
75
  python_env = conf.get("unicore.executor", "DEFAULT_ENV")
76
+ tmp_dir = conf.get("unicore.executor", "TMP_DIR", "/tmp")
65
77
  # prepare dag file to be uploaded via unicore
66
78
  # dag_file = open("/tmp/test")
67
79
  # dag_content = dag_file.readlines()
68
80
  # dag_import = {"To": dag_rel_path, "Data": dag_content}
69
81
  worker_script_import = {
70
82
  "To": "run_task_via_supervisor.py",
71
- "From": "https://gist.githubusercontent.com/cboettcher/3f1101a1d1b67e7944d17c02ecd69930/raw/1d90bf38199d8c0adf47a79c8840c3e3ddf57462/run_task_via_supervisor.py",
83
+ # "From": "https://gist.githubusercontent.com/cboettcher/3f1101a1d1b67e7944d17c02ecd69930/raw/1d90bf38199d8c0adf47a79c8840c3e3ddf57462/run_task_via_supervisor.py",
84
+ "Data": LAUNCH_SCRIPT_CONTENT_STR,
72
85
  }
73
86
  # start filling the actual job description
74
87
  job_descr_dict["Name"] = f"{key.dag_id} - {key.task_id} - {key.run_id} - {key.try_number}"
75
88
  job_descr_dict["Executable"] = (
76
- "python" # TODO may require module load to be setup for some systems
89
+ f". airflow_config.env && . {python_env} && python run_task_via_supervisor.py --json-string '{workload.model_dump_json()}'" # TODO may require module load to be setup for some systems
77
90
  )
78
- job_descr_dict["Arguments"] = [
79
- "run_task_via_supervisor.py",
80
- f"--json-string '{workload.model_dump_json()}'",
81
- ]
91
+ # job_descr_dict["Arguments"] = [
92
+ # "-c",
93
+ # "source airflow_config.env",
94
+ # "source {python_env}/bin/activate",
95
+ # "python",
96
+ # "run_task_via_supervisor.py",
97
+ # f"--json-string '{workload.model_dump_json()}'",
98
+ # ]
99
+
82
100
  job_descr_dict["Environment"] = {
83
101
  "AIRFLOW__CORE__EXECUTION_API_SERVER_URL": server,
84
- "AIRFLOW__CORE__DAGS_FOLDER": "./",
102
+ # "AIRFLOW__CORE__DAGS_FOLDER": "./",
85
103
  "AIRFLOW__LOGGING__LOGGING_LEVEL": "DEBUG",
86
104
  "AIRFLOW__CORE__EXECUTOR": "LocalExecutor,airflow_unicore_integration.executors.unicore_executor.UnicoreExecutor",
105
+ "AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_STORAGE_PATH": f"{tmp_dir}/{workload.ti.id}/dagbundle",
87
106
  }
88
107
 
89
108
  # build filecontent string for importing in the job | this is needed to avoid confusing nested quotes and trying to escape them properly when using unicore env vars directly
90
- env_file_content: list[str] = [
91
- f"export AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_CONFIG_LIST='{os.environ.get("AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_CONFIG_LIST", "")}'"
92
- ]
109
+ env_file_content: list[str] = []
93
110
 
94
- # insert connection details that are provided via env vars to get bundles
95
- for env_key in os.environ.keys():
96
- if env_key.startswith("AIRFLOW_CONN_"):
97
- env_file_content.append(f"export {env_key}='{os.environ[env_key]}'")
111
+ # transmit needed dag bundle information (and possibly files) to job directory
112
+ bundle_str = conf.get("dag.processor", "dag_bundle_config_list")
113
+ logger.debug(f"Dag Bundle config is: {bundle_str}")
114
+ bundle_dict = json.loads(bundle_str)
115
+ conn_id_to_transmit = None
116
+ bundle_type = None
117
+
118
+ for bundle in bundle_dict:
119
+ if bundle["name"] == workload.bundle_info.name:
120
+ if bundle["classpath"] == NaiveJobDescriptionGenerator.GIT_DAG_BUNDLE_CLASSPATH:
121
+ bundle_type = NaiveJobDescriptionGenerator.GIT_DAG_BUNDLE_CLASSPATH
122
+ env_file_content.append(
123
+ f"export AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_CONFIG_LIST='[{json.dumps(bundle)}]'"
124
+ )
125
+ conn_id_to_transmit = bundle["kwargs"]["git_conn_id"]
126
+ break
127
+ # TODO handle other bundle types
128
+
129
+ if bundle_type:
130
+ if (
131
+ bundle_type == NaiveJobDescriptionGenerator.GIT_DAG_BUNDLE_CLASSPATH
132
+ and conn_id_to_transmit
133
+ ):
134
+ git_hook = GitHook(conn_id_to_transmit)
135
+ git_remote_url = git_hook.repo_url
136
+ git_local_url = f"{tmp_dir}/{workload.ti.id}/dagmirror"
137
+ # add precommand to clone repo on ligon node
138
+ git_precommand = f". {python_env} && mkdir -p {tmp_dir}/{workload.ti.id}/dagmirror && mkdir -p {tmp_dir}/{workload.ti.id}/dagbundle && git clone {git_remote_url} {git_local_url}"
139
+ logger.info(f"git precommand is {git_precommand}")
140
+ user_added_pre_commands.append(git_precommand)
141
+ # add connection to local clone to env of job
142
+ airflow_conn_string = json.dumps(
143
+ {"conn_type": "git", "host": f"file://{git_local_url}"}
144
+ )
145
+ env_file_content.append(
146
+ f"export AIRFLOW_CONN_{str(conn_id_to_transmit).upper()}='{airflow_conn_string}'"
147
+ )
148
+ logger.info(f"connection is '{airflow_conn_string}'")
149
+ # add cleanup of local git repo to job description
150
+ git_cleanup_command = f"rm -r {tmp_dir}/{workload.ti.id}"
151
+ logger.info(f"git cleanup is {git_cleanup_command}")
152
+ user_added_post_commands.append(git_cleanup_command)
98
153
 
99
154
  airflow_env_import = {"To": "airflow_config.env", "Data": env_file_content}
100
155
 
101
- user_added_pre_commands.append(
102
- f"source airflow_config.env && source {python_env}/bin/activate"
103
- )
156
+ job_descr_dict["User postcommand"] = ";".join(user_added_post_commands)
104
157
  job_descr_dict["User precommand"] = ";".join(user_added_pre_commands)
105
158
  job_descr_dict["RunUserPrecommandOnLoginNode"] = (
106
- "false" # precommand includes activating the python env, this should be done on compute node right before running the job
159
+ "true" # precommand needs public internet access to clone dag repos
107
160
  )
108
161
  job_descr_dict["Imports"] = [worker_script_import, airflow_env_import]
109
162
  # add user defined options to description
@@ -0,0 +1,90 @@
1
+ LAUNCH_SCRIPT_CONTENT_STR = '''# get some debug info if before anything can fail
2
+ import sys
3
+ print(sys.executable)
4
+
5
+ from airflow.executors import workloads
6
+ from airflow.sdk.execution_time.supervisor import supervise
7
+ from airflow.configuration import conf
8
+ from pydantic import TypeAdapter
9
+ import argparse
10
+ import structlog
11
+ import sys
12
+ log = structlog.get_logger(logger_name=__name__)
13
+
14
+ """
15
+ Usage:
16
+
17
+ python run_task_via_supervisor.py [--json-string <workload string> | --json-file <workload filepath>]
18
+
19
+ """
20
+
21
+
22
+ def execute_workload_locally(workload: workloads.All):
23
+ if not isinstance(workload, workloads.ExecuteTask):
24
+ raise ValueError(f"Executor does not know how to handle {type(workload)}")
25
+
26
+ base_url = conf.get("api", "base_url", fallback="/")
27
+ default_execution_api_server = f"{base_url.rstrip('/')}/execution/"
28
+ server = conf.get("core", "execution_api_server_url", fallback=default_execution_api_server)
29
+ log.info(f"Connecting to server:{server}" )
30
+
31
+ log.debug(f"Workload is: {workload}")
32
+ log.debug(f"Dag Bundle is: {workload.bundle_info}")
33
+
34
+ supervise(
35
+ # This is the "wrong" ti type, but it duck types the same. TODO: Create a protocol for this.
36
+ ti=workload.ti, # type: ignore[arg-type]
37
+ dag_rel_path=workload.dag_rel_path,
38
+ bundle_info=workload.bundle_info,
39
+ token=workload.token,
40
+ server=server,
41
+ log_path=workload.log_path,
42
+ # Include the output of the task to stdout too, so that in process logs can be read from via
43
+ # unicore as job logs.
44
+ subprocess_logs_to_stdout=True,
45
+ )
46
+
47
+
48
+ def main():
49
+ parser = argparse.ArgumentParser(
50
+ description="Execute a workload in a Containerised executor using the task SDK."
51
+ )
52
+
53
+ # Create a mutually exclusive group to ensure that only one of the flags is set
54
+ group = parser.add_mutually_exclusive_group(required=True)
55
+ group.add_argument(
56
+ "--json-path",
57
+ help="Path to the input JSON file containing the execution workload payload.",
58
+ type=str,
59
+ )
60
+ group.add_argument(
61
+ "--json-string",
62
+ help="The JSON string itself containing the execution workload payload.",
63
+ type=str,
64
+ )
65
+ args = parser.parse_args()
66
+
67
+ decoder = TypeAdapter[workloads.All](workloads.All)
68
+
69
+ if args.json_path:
70
+ try:
71
+ with open(args.json_path) as file:
72
+ input_data = file.read()
73
+ workload = decoder.validate_json(input_data)
74
+ except Exception as e: # noqa: B902
75
+ log.error("Failed to read file", error=str(e))
76
+ sys.exit(1)
77
+
78
+ elif args.json_string:
79
+ try:
80
+ workload = decoder.validate_json(args.json_string)
81
+ except Exception as e:
82
+ log.error("Failed to parse input JSON string", error=str(e))
83
+ sys.exit(1)
84
+
85
+ execute_workload_locally(workload)
86
+
87
+
88
+ if __name__ == "__main__":
89
+ main()
90
+ '''
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: airflow-unicore-integration
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: Running Unicore Jobs from airflow DAGs.
5
5
  Author-email: Christian Böttcher <c.boettcher@fz-juelich.de>
6
6
  License-Expression: BSD-3-Clause
@@ -1,16 +1,17 @@
1
1
  airflow_unicore_integration/__init__.py,sha256=Qy1mlyxe2Y-PPSn0LgIW0sT6BxFuwW4_LGsPBf-Wm4s,549
2
2
  airflow_unicore_integration/executors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  airflow_unicore_integration/executors/run_task_via_supervisor.py,sha256=3ErgPf-Oy3B4Di5yNXhhPkaojIJykvCxMZ9MlKSYPI8,2756
4
- airflow_unicore_integration/executors/unicore_executor.py,sha256=QNfF5sW4fRou89Hjo7j7SbfsXBo4fW3F4hkgbThrY6c,6008
4
+ airflow_unicore_integration/executors/unicore_executor.py,sha256=E1nOskWSBmC-ReLRvA8E3bY-G0lpxP403tazlBNhgFQ,5919
5
5
  airflow_unicore_integration/hooks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  airflow_unicore_integration/hooks/unicore_hooks.py,sha256=JjcjogWtN1xveagpkraQuYOdXjkp2lSnEdQc0waqhU4,1662
7
7
  airflow_unicore_integration/operators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  airflow_unicore_integration/operators/unicore_operators.py,sha256=xYX1t_QzjpqHZDoEu6jJNNXVmBEnjIlApFWvgYoYoB0,16790
9
9
  airflow_unicore_integration/policies/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- airflow_unicore_integration/util/job.py,sha256=BSP-XuWL5-TRGACX__MavIcmPAA52gYLdwWL236cVz8,6703
11
- airflow_unicore_integration-0.1.7.dist-info/licenses/LICENSE,sha256=hZ5ouAedeNr8ClHrQE-RLsgMsARcmv3kSZz7tE2BTJE,1526
12
- airflow_unicore_integration-0.1.7.dist-info/METADATA,sha256=abUzAW9IfF_GXeiC1DdtUxxKxjAIfj9vJ8WzfBjQPu8,11188
13
- airflow_unicore_integration-0.1.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
- airflow_unicore_integration-0.1.7.dist-info/entry_points.txt,sha256=PzEfCLYLSawjiYR-HNBzw8-YGfJxs1nPBULevgBQjoY,147
15
- airflow_unicore_integration-0.1.7.dist-info/top_level.txt,sha256=j45X-uIuOk3oL78iwlpHakMWtUkg__B7zUlJLwmZx6w,28
16
- airflow_unicore_integration-0.1.7.dist-info/RECORD,,
10
+ airflow_unicore_integration/util/job.py,sha256=Te3HNieipFQ3KrCzEf17wvSN5FnBgelW9Q3jMyr6Irg,9590
11
+ airflow_unicore_integration/util/launch_script_content.py,sha256=42_aFpaCMmvFmmUxQDGcudkleX1YSK_yYWE8T41NOy0,2915
12
+ airflow_unicore_integration-0.1.9.dist-info/licenses/LICENSE,sha256=hZ5ouAedeNr8ClHrQE-RLsgMsARcmv3kSZz7tE2BTJE,1526
13
+ airflow_unicore_integration-0.1.9.dist-info/METADATA,sha256=b-AYw8ivzM6mpVLuMDJpl5oHsKFYaIXs3_vtb6cniWM,11188
14
+ airflow_unicore_integration-0.1.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
+ airflow_unicore_integration-0.1.9.dist-info/entry_points.txt,sha256=PzEfCLYLSawjiYR-HNBzw8-YGfJxs1nPBULevgBQjoY,147
16
+ airflow_unicore_integration-0.1.9.dist-info/top_level.txt,sha256=j45X-uIuOk3oL78iwlpHakMWtUkg__B7zUlJLwmZx6w,28
17
+ airflow_unicore_integration-0.1.9.dist-info/RECORD,,