airflow-unicore-integration 0.1.8__tar.gz → 0.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {airflow_unicore_integration-0.1.8/src/airflow_unicore_integration.egg-info → airflow_unicore_integration-0.1.10}/PKG-INFO +1 -1
  2. {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/pyproject.toml +1 -1
  3. {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration/util/job.py +87 -23
  4. airflow_unicore_integration-0.1.10/src/airflow_unicore_integration/util/launch_script_content.py +90 -0
  5. {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10/src/airflow_unicore_integration.egg-info}/PKG-INFO +1 -1
  6. {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration.egg-info/SOURCES.txt +2 -1
  7. {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/LICENSE +0 -0
  8. {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/README.rst +0 -0
  9. {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/setup.cfg +0 -0
  10. {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration/__init__.py +0 -0
  11. {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration/executors/__init__.py +0 -0
  12. {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration/executors/run_task_via_supervisor.py +0 -0
  13. {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration/executors/unicore_executor.py +0 -0
  14. {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration/hooks/__init__.py +0 -0
  15. {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration/hooks/unicore_hooks.py +0 -0
  16. {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration/operators/__init__.py +0 -0
  17. {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration/operators/unicore_operators.py +0 -0
  18. {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration/policies/__init__.py +0 -0
  19. {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration.egg-info/dependency_links.txt +0 -0
  20. {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration.egg-info/entry_points.txt +0 -0
  21. {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration.egg-info/requires.txt +0 -0
  22. {airflow_unicore_integration-0.1.8 → airflow_unicore_integration-0.1.10}/src/airflow_unicore_integration.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: airflow-unicore-integration
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: Running Unicore Jobs from airflow DAGs.
5
5
  Author-email: Christian Böttcher <c.boettcher@fz-juelich.de>
6
6
  License-Expression: BSD-3-Clause
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
7
7
 
8
8
  [project]
9
9
  name = "airflow-unicore-integration"
10
- version = "0.1.8"
10
+ version = "0.1.10"
11
11
  authors = [
12
12
  { name="Christian Böttcher", email="c.boettcher@fz-juelich.de" },
13
13
  ]
@@ -1,10 +1,16 @@
1
- import os
1
+ import json
2
+ import logging
2
3
  from typing import Any
3
4
  from typing import Dict
4
5
 
5
6
  from airflow.configuration import conf
6
7
  from airflow.executors.workloads import ExecuteTask
7
8
  from airflow.models.taskinstancekey import TaskInstanceKey
9
+ from airflow.providers.git.hooks.git import GitHook
10
+
11
+ from .launch_script_content import LAUNCH_SCRIPT_CONTENT_STR
12
+
13
+ logger = logging.getLogger(__name__)
8
14
 
9
15
 
10
16
  class JobDescriptionGenerator:
@@ -18,6 +24,7 @@ class JobDescriptionGenerator:
18
24
  EXECUTOR_CONFIG_PARAMETERS = "Parameters" # gets added to the unicore job description
19
25
  EXECUTOR_CONFIG_PROJECT = "Project" # gets added to the unicore job description
20
26
  EXECUTOR_CONFIG_PRE_COMMANDS = "precommands" # gets added to the unicore job description
27
+ EXECUTOR_CONFIG_POST_COMMANDS = "postcommands" # gets added to the unicore job descirption
21
28
  EXECUTOR_CONFIG_UNICORE_CONN_KEY = (
22
29
  "unicore_connection_id" # alternative connection id for the Unicore connection to use
23
30
  )
@@ -27,12 +34,17 @@ class JobDescriptionGenerator:
27
34
  def create_job_description(self, workload: ExecuteTask) -> Dict[str, Any]:
28
35
  raise NotImplementedError()
29
36
 
37
+ def get_job_name(self, key: TaskInstanceKey) -> str:
38
+ return f"{key.dag_id} - {key.task_id} - {key.run_id} - {key.try_number}"
39
+
30
40
 
31
41
  class NaiveJobDescriptionGenerator(JobDescriptionGenerator):
32
42
  """
33
- This class generates a naive unicore job, that expects there to be a working python env containign airflow and any other required dependencies on the executing system.
43
+ This class generates a naive unicore job, that expects there to be a working python env containing airflow and any other required dependencies on the executing system.
34
44
  """
35
45
 
46
+ GIT_DAG_BUNDLE_CLASSPATH = "airflow.providers.git.bundles.git.GitDagBundle"
47
+
36
48
  def create_job_description(self, workload: ExecuteTask) -> Dict[str, Any]:
37
49
  key: TaskInstanceKey = workload.ti.key
38
50
  executor_config = workload.ti.executor_config
@@ -46,6 +58,7 @@ class NaiveJobDescriptionGenerator(JobDescriptionGenerator):
46
58
  user_added_resources: Dict[str, str] = executor_config.get(JobDescriptionGenerator.EXECUTOR_CONFIG_RESOURCES, None) # type: ignore
47
59
  user_added_pre_commands: list[str] = executor_config.get(JobDescriptionGenerator.EXECUTOR_CONFIG_PRE_COMMANDS, []) # type: ignore
48
60
  user_defined_python_env: str = workload.ti.executor_config.get(JobDescriptionGenerator.EXECUTOR_CONFIG_PYTHON_ENV_KEY, None) # type: ignore
61
+ user_added_post_commands: list[str] = executor_config.get(JobDescriptionGenerator.EXECUTOR_CONFIG_POST_COMMANDS, []) # type: ignore
49
62
  # get local dag path from cmd and fix dag path in arguments
50
63
  dag_rel_path = str(workload.dag_rel_path)
51
64
  if dag_rel_path.startswith("DAG_FOLDER"):
@@ -56,56 +69,107 @@ class NaiveJobDescriptionGenerator(JobDescriptionGenerator):
56
69
  server = conf.get(
57
70
  "unicore.executor", "execution_api_server_url", fallback=default_execution_api_server
58
71
  )
72
+ logger.debug(f"Server is {server}")
59
73
 
60
74
  # check which python virtualenv to use
61
75
  if user_defined_python_env:
62
76
  python_env = user_defined_python_env
63
77
  else:
64
78
  python_env = conf.get("unicore.executor", "DEFAULT_ENV")
79
+ tmp_dir = conf.get("unicore.executor", "TMP_DIR", "/tmp")
65
80
  # prepare dag file to be uploaded via unicore
66
81
  # dag_file = open("/tmp/test")
67
82
  # dag_content = dag_file.readlines()
68
83
  # dag_import = {"To": dag_rel_path, "Data": dag_content}
69
84
  worker_script_import = {
70
85
  "To": "run_task_via_supervisor.py",
71
- "From": "https://gist.githubusercontent.com/cboettcher/3f1101a1d1b67e7944d17c02ecd69930/raw/1d90bf38199d8c0adf47a79c8840c3e3ddf57462/run_task_via_supervisor.py",
86
+ # "From": "https://gist.githubusercontent.com/cboettcher/3f1101a1d1b67e7944d17c02ecd69930/raw/1d90bf38199d8c0adf47a79c8840c3e3ddf57462/run_task_via_supervisor.py",
87
+ "Data": LAUNCH_SCRIPT_CONTENT_STR,
72
88
  }
73
89
  # start filling the actual job description
74
- job_descr_dict["Name"] = f"{key.dag_id} - {key.task_id} - {key.run_id} - {key.try_number}"
90
+ job_descr_dict["Name"] = self.get_job_name(key)
75
91
  job_descr_dict["Executable"] = (
76
- "python" # TODO may require module load to be setup for some systems
92
+ f". airflow_config.env && . {python_env} && python run_task_via_supervisor.py --json-string '{workload.model_dump_json()}'" # TODO may require module load to be setup for some systems
77
93
  )
78
- job_descr_dict["Arguments"] = [
79
- "run_task_via_supervisor.py",
80
- f"--json-string '{workload.model_dump_json()}'",
81
- ]
94
+ # job_descr_dict["Arguments"] = [
95
+ # "-c",
96
+ # "source airflow_config.env",
97
+ # "source {python_env}/bin/activate",
98
+ # "python",
99
+ # "run_task_via_supervisor.py",
100
+ # f"--json-string '{workload.model_dump_json()}'",
101
+ # ]
102
+
82
103
  job_descr_dict["Environment"] = {
83
104
  "AIRFLOW__CORE__EXECUTION_API_SERVER_URL": server,
84
- "AIRFLOW__CORE__DAGS_FOLDER": "./",
105
+ # "AIRFLOW__CORE__DAGS_FOLDER": "./",
85
106
  "AIRFLOW__LOGGING__LOGGING_LEVEL": "DEBUG",
86
107
  "AIRFLOW__CORE__EXECUTOR": "LocalExecutor,airflow_unicore_integration.executors.unicore_executor.UnicoreExecutor",
108
+ "AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_STORAGE_PATH": f"{tmp_dir}/{workload.ti.id}/dagbundle",
87
109
  }
88
110
 
89
111
  # build filecontent string for importing in the job | this is needed to avoid confusing nested quotes and trying to escape them properly when using unicore env vars directly
90
- env_file_content: list[str] = [
91
- f"export AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_CONFIG_LIST='{os.environ.get("AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_CONFIG_LIST", "")}'"
92
- ]
112
+ env_file_content: list[str] = []
113
+
114
+ # transmit needed dag bundle information (and possibly files) to job directory
115
+ bundle_str = conf.get("dag.processor", "dag_bundle_config_list")
116
+ logger.debug(f"Dag Bundle config is: {bundle_str}")
117
+ bundle_dict = json.loads(bundle_str)
118
+ conn_id_to_transmit = None
119
+ bundle_type = None
93
120
 
94
- # insert connection details that are provided via env vars to get bundles
95
- for env_key in os.environ.keys():
96
- if env_key.startswith("AIRFLOW_CONN_"):
97
- env_file_content.append(f"export {env_key}='{os.environ[env_key]}'")
121
+ for bundle in bundle_dict:
122
+ if bundle["name"] == workload.bundle_info.name:
123
+ if bundle["classpath"] == NaiveJobDescriptionGenerator.GIT_DAG_BUNDLE_CLASSPATH:
124
+ bundle_type = NaiveJobDescriptionGenerator.GIT_DAG_BUNDLE_CLASSPATH
125
+ env_file_content.append(
126
+ f"export AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_CONFIG_LIST='[{json.dumps(bundle)}]'"
127
+ )
128
+ conn_id_to_transmit = bundle["kwargs"]["git_conn_id"]
129
+ break
130
+ # TODO handle other bundle types
131
+
132
+ if bundle_type:
133
+ if (
134
+ bundle_type == NaiveJobDescriptionGenerator.GIT_DAG_BUNDLE_CLASSPATH
135
+ and conn_id_to_transmit
136
+ ):
137
+ git_hook = GitHook(conn_id_to_transmit)
138
+ git_remote_url = git_hook.repo_url
139
+ git_local_url = f"{tmp_dir}/{workload.ti.id}/dagmirror"
140
+ # add precommand to clone repo on ligon node
141
+ git_precommand = f". {python_env} && mkdir -p {tmp_dir}/{workload.ti.id}/dagmirror && mkdir -p {tmp_dir}/{workload.ti.id}/dagbundle && git clone {git_remote_url} {git_local_url}"
142
+ logger.info(f"git precommand is {git_precommand}")
143
+ user_added_pre_commands.append(git_precommand)
144
+ # add connection to local clone to env of job
145
+ airflow_conn_string = json.dumps(
146
+ {"conn_type": "git", "host": f"file://{git_local_url}"}
147
+ )
148
+ env_file_content.append(
149
+ f"export AIRFLOW_CONN_{str(conn_id_to_transmit).upper()}='{airflow_conn_string}'"
150
+ )
151
+ logger.info(f"connection is '{airflow_conn_string}'")
152
+ # add cleanup of local git repo to job description
153
+ git_cleanup_command = f"rm -r {tmp_dir}/{workload.ti.id}"
154
+ logger.info(f"git cleanup is {git_cleanup_command}")
155
+ user_added_post_commands.append(git_cleanup_command)
98
156
 
99
157
  airflow_env_import = {"To": "airflow_config.env", "Data": env_file_content}
100
158
 
101
- user_added_pre_commands.append(
102
- f"source airflow_config.env && source {python_env}/bin/activate"
103
- )
104
- job_descr_dict["User precommand"] = ";".join(user_added_pre_commands)
159
+ job_descr_dict["Imports"] = [worker_script_import, airflow_env_import]
160
+
161
+ if len(user_added_pre_commands) > 0:
162
+ precommand_import = {"To": "precommand.sh", "Data": user_added_pre_commands}
163
+ job_descr_dict["Imports"].append(precommand_import)
164
+ job_descr_dict["User precommand"] = "bash precommand.sh"
165
+ if len(user_added_post_commands) > 0:
166
+ postcommand_import = {"To": "postcommand.sh", "Data": user_added_post_commands}
167
+ job_descr_dict["Imports"].append(postcommand_import)
168
+ job_descr_dict["User postcommand"] = "bash postcommand.sh"
169
+
105
170
  job_descr_dict["RunUserPrecommandOnLoginNode"] = (
106
- "false" # precommand includes activating the python env, this should be done on compute node right before running the job
171
+ "true" # precommand needs public internet access to clone dag repos
107
172
  )
108
- job_descr_dict["Imports"] = [worker_script_import, airflow_env_import]
109
173
  # add user defined options to description
110
174
  if user_added_env:
111
175
  job_descr_dict["Environment"].update(user_added_env)
@@ -0,0 +1,90 @@
1
+ LAUNCH_SCRIPT_CONTENT_STR = '''# get some debug info if before anything can fail
2
+ import sys
3
+ print(sys.executable)
4
+
5
+ from airflow.executors import workloads
6
+ from airflow.sdk.execution_time.supervisor import supervise
7
+ from airflow.configuration import conf
8
+ from pydantic import TypeAdapter
9
+ import argparse
10
+ import structlog
11
+ import sys
12
+ log = structlog.get_logger(logger_name=__name__)
13
+
14
+ """
15
+ Usage:
16
+
17
+ python run_task_via_supervisor.py [--json-string <workload string> | --json-file <workload filepath>]
18
+
19
+ """
20
+
21
+
22
+ def execute_workload_locally(workload: workloads.All):
23
+ if not isinstance(workload, workloads.ExecuteTask):
24
+ raise ValueError(f"Executor does not know how to handle {type(workload)}")
25
+
26
+ base_url = conf.get("api", "base_url", fallback="/")
27
+ default_execution_api_server = f"{base_url.rstrip('/')}/execution/"
28
+ server = conf.get("core", "execution_api_server_url", fallback=default_execution_api_server)
29
+ log.info(f"Connecting to server:{server}" )
30
+
31
+ log.debug(f"Workload is: {workload}")
32
+ log.debug(f"Dag Bundle is: {workload.bundle_info}")
33
+
34
+ supervise(
35
+ # This is the "wrong" ti type, but it duck types the same. TODO: Create a protocol for this.
36
+ ti=workload.ti, # type: ignore[arg-type]
37
+ dag_rel_path=workload.dag_rel_path,
38
+ bundle_info=workload.bundle_info,
39
+ token=workload.token,
40
+ server=server,
41
+ log_path=workload.log_path,
42
+ # Include the output of the task to stdout too, so that in process logs can be read from via
43
+ # unicore as job logs.
44
+ subprocess_logs_to_stdout=True,
45
+ )
46
+
47
+
48
+ def main():
49
+ parser = argparse.ArgumentParser(
50
+ description="Execute a workload in a Containerised executor using the task SDK."
51
+ )
52
+
53
+ # Create a mutually exclusive group to ensure that only one of the flags is set
54
+ group = parser.add_mutually_exclusive_group(required=True)
55
+ group.add_argument(
56
+ "--json-path",
57
+ help="Path to the input JSON file containing the execution workload payload.",
58
+ type=str,
59
+ )
60
+ group.add_argument(
61
+ "--json-string",
62
+ help="The JSON string itself containing the execution workload payload.",
63
+ type=str,
64
+ )
65
+ args = parser.parse_args()
66
+
67
+ decoder = TypeAdapter[workloads.All](workloads.All)
68
+
69
+ if args.json_path:
70
+ try:
71
+ with open(args.json_path) as file:
72
+ input_data = file.read()
73
+ workload = decoder.validate_json(input_data)
74
+ except Exception as e: # noqa: B902
75
+ log.error("Failed to read file", error=str(e))
76
+ sys.exit(1)
77
+
78
+ elif args.json_string:
79
+ try:
80
+ workload = decoder.validate_json(args.json_string)
81
+ except Exception as e:
82
+ log.error("Failed to parse input JSON string", error=str(e))
83
+ sys.exit(1)
84
+
85
+ execute_workload_locally(workload)
86
+
87
+
88
+ if __name__ == "__main__":
89
+ main()
90
+ '''
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: airflow-unicore-integration
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: Running Unicore Jobs from airflow DAGs.
5
5
  Author-email: Christian Böttcher <c.boettcher@fz-juelich.de>
6
6
  License-Expression: BSD-3-Clause
@@ -16,4 +16,5 @@ src/airflow_unicore_integration/hooks/unicore_hooks.py
16
16
  src/airflow_unicore_integration/operators/__init__.py
17
17
  src/airflow_unicore_integration/operators/unicore_operators.py
18
18
  src/airflow_unicore_integration/policies/__init__.py
19
- src/airflow_unicore_integration/util/job.py
19
+ src/airflow_unicore_integration/util/job.py
20
+ src/airflow_unicore_integration/util/launch_script_content.py