airflow-unicore-integration 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ def get_provider_info():
2
+ return {
3
+ "package-name": "airflow-unicore-integration",
4
+ "name": "Unicore",
5
+ "description": "Apache Airflow Unicore provider containing Operators and hooks.",
6
+ "connection-types": [{
7
+ "connection-type": "unicore",
8
+ "hook-class-name": "airflow_unicore_integration.hooks.unicore_hooks.UnicoreHook",
9
+ }],
10
+ }
File without changes
@@ -0,0 +1,49 @@
1
+ from __future__ import annotations
2
+
3
+ from pyunicore import client, credentials
4
+
5
+ from airflow.hooks.base import BaseHook
6
+
7
+
8
+ class UnicoreHook(BaseHook):
9
+ """
10
+ Interact with Unicore.
11
+
12
+ Creates Unicore Clients from airflow connections.
13
+
14
+ :param uc_conn_id: The unicore connection id - default: uc_default
15
+ """
16
+
17
+ conn_name_attr = "uc_conn_id"
18
+ default_conn_name = "uc_default"
19
+ conn_type = "unicore"
20
+ hook_name = "Unicore"
21
+
22
+ def __init__(self, uc_conn_id: str = default_conn_name) -> None:
23
+ super().__init__()
24
+ self.uc_conn_id = uc_conn_id
25
+ self.conn = None
26
+
27
+ def get_conn(self, overwrite_base_url: str | None = None, overwrite_credential : credentials.Credential | None = None) -> client.Client:
28
+ """Return a Unicore Client. base_url and credentials may be overwritten."""
29
+ if self.conn is None or overwrite_base_url is not None or overwrite_credential is not None: # if not generated, or overwrite attribute is set crete new
30
+ params = self.get_connection(self.uc_conn_id)
31
+ base_url = params.host
32
+ credential = credentials.UsernamePassword(params.login, params.password)
33
+ if overwrite_base_url is not None:
34
+ base_url = overwrite_base_url
35
+ if overwrite_credential is not None:
36
+ credential = overwrite_credential
37
+ self.conn = client.Client(credential, base_url)
38
+
39
+ return self.conn
40
+
41
+
42
+ def test_connection(self) -> tuple[bool, str]:
43
+ """Test the connection by sending an access_info request"""
44
+ try:
45
+ conn = self.get_conn()
46
+ conn.access_info()
47
+ return True, "Connection successfully tested"
48
+ except Exception as e:
49
+ return False, str(e)
File without changes
@@ -0,0 +1,324 @@
1
+ from airflow.models.baseoperator import BaseOperator
2
+ from airflow.decorators.base import DecoratedOperator, task_decorator_factory
3
+ from typing import Any, List, Dict
4
+
5
+ from airflow.utils.context import Context
6
+
7
+ import pyunicore.client as uc_client
8
+ import pyunicore.credentials as uc_credentials
9
+ from airflow_unicore_integration.hooks import unicore_hooks
10
+
11
+ import logging
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ DEFAULT_SCRIPT_NAME = 'default_script_from_job_description'
16
+ DEFAULT_BSS_FILE = 'default_bss_file_upload'
17
+
18
+ class JobDescriptionException(BaseException):
19
+ """ For errors in the unicore job description that would be generated by the unicore operators."""
20
+ def __init__(self, *args: object) -> None:
21
+ super().__init__(*args)
22
+
23
+ class UnicoreGenericOperator(BaseOperator):
24
+
25
+ def __init__(self, name: str, application_name : str | None = None, application_version: str | None = None, executable: str | None = None, arguments: List[str] | None = None,
26
+ environment: List[str] | None = None, parameters: Dict[str,str | List[str]] | None = None, stdout: str | None = None, stderr: str | None = None, stdin: str | None = None, ignore_non_zero_exit_code: bool | None = None,
27
+ user_pre_command: str | None = None, run_user_pre_command_on_login_node: bool | None = None, user_pre_command_ignore_non_zero_exit_code: bool | None = None, user_post_command: str | None = None,
28
+ run_user_post_command_on_login_node: bool | None = None, user_post_command_ignore_non_zero_exit_code: bool | None = None, resources: Dict[str, str] | None = None, project: str | None = None,
29
+ imports: List[Dict[str,str | List[str]]] | None = None, exports: List[Dict[str,str | List[str]]] | None = None, have_client_stagein: bool | None = None, job_type: str | None = None,
30
+ login_node: str | None = None, bss_file: str | None = None, tags: List[str] | None = None, notification: str | None = None, user_email: str | None = None, xcom_output_files: List[str] = ["stdout", "stderr"],
31
+ base_url: str | None = None, credential: uc_credentials.Credential | None = None, credential_username: str | None = None, credential_password: str | None = None, credential_token: str | None = None, **kwargs):
32
+ """
33
+ Initialize a Unicore Job Operator.
34
+ :param name: The name parameter defines both the airflow task name and the unicore job name.
35
+ :param xcom_output_files: A list of filenames to be exported from the job directory via x_coms. By default stdout and stderr.
36
+ :param base_url: The base URL of the UNICOREX server to be used for the unicore client. Overwrites the configured airflow conenction.
37
+ :param credential: A unicore Credential to be used for the unicore client. Overwrites the configured user-pass in the airflow connection.
38
+ :param credential_username: Username for the unicore client credentials. Overwrites the configured user in the airflow connection.
39
+ :param credential_password: Password the the unicore client credentials. Overwrites the configured password in the airflow connection.
40
+ :param credential_token: An OIDC token to be used by the unicore client. Overwrites the configured credentials in teh airflow connection.
41
+
42
+ All other parameters are parameters for the Unicore Job Description as defined [here](https://unicore-docs.readthedocs.io/en/latest/user-docs/rest-api/job-description/index.html#overview).
43
+ """
44
+ super().__init__(**kwargs)
45
+ self.name = name
46
+ self.application_name = application_name
47
+ self.application_version = application_version
48
+ self.executable = executable
49
+ self.arguments = arguments
50
+ self.environment = environment
51
+ self.parameters = parameters
52
+ self.stdout = stdout
53
+ self.stderr = stderr
54
+ self.stdin = stdin
55
+ self.ignore_non_zero_exit_code = ignore_non_zero_exit_code
56
+ self.user_pre_command = user_pre_command
57
+ self.run_user_pre_command_on_login_node = run_user_pre_command_on_login_node
58
+ self.user_pre_command_ignore_non_zero_exit_code = user_pre_command_ignore_non_zero_exit_code
59
+ self.user_post_command = user_post_command
60
+ self.run_user_post_command_on_login_node = run_user_post_command_on_login_node
61
+ self.user_post_command_ignore_non_zero_exit_code = user_post_command_ignore_non_zero_exit_code
62
+ self.resources = resources
63
+ self.project = project
64
+ self.imports = imports
65
+ self.exports = exports
66
+ self.have_client_stagein = have_client_stagein
67
+ self.job_type = job_type
68
+ self.login_node = login_node
69
+ self.bss_file = bss_file
70
+ self.tags = tags
71
+ self.notification = notification
72
+ self.user_email = user_email
73
+ self.xcom_output_files = xcom_output_files
74
+
75
+ self.base_url = base_url
76
+ self.credential = credential
77
+ self.credential_username = credential_username
78
+ self.credential_password = credential_password
79
+ self.credential_token = credential_token
80
+
81
+ self.validate_job_description()
82
+ logger.debug("created Unicore Job Task")
83
+
84
+ def validate_job_description(self):
85
+ """
86
+ Does some basic validation and checks if a proper job description can be generated. Raises a JobDescriptionException if not.
87
+ """
88
+ # check for some errors in the parameters for creating the unicore job
89
+
90
+ # first check if application or executable been set
91
+ if self.application_name is None and self.executable is None: # TODO check if executable is required if bss_file is given
92
+ raise JobDescriptionException
93
+
94
+ # if bss_file is set, we need an executable
95
+ if self.bss_file is not None:
96
+ if self.executable is None and self.application_name is not None:
97
+ raise JobDescriptionException
98
+ # TODO validate BSS file?
99
+ logger.info("using bss file")
100
+
101
+ if self.credential_token is not None and self.credential is None:
102
+ logger.info("using provided oidc token")
103
+ self.credential = uc_credentials.OIDCToken(token=self.credential_token)
104
+
105
+ if self.credential_username is not None and self.credential_password is not None and self.credential is None:
106
+ logger.info("using provied user/pass credentials")
107
+ self.credential = uc_credentials.UsernamePassword(username=self.credential_username, password=self.credential_password)
108
+
109
+
110
+ def get_job_description(self) -> dict[str,Any]:
111
+ """Generates the job description to be submitted to the unicore server."""
112
+ job_description_dict: Dict = {}
113
+
114
+ # now add the various simple string attribute fragments to the list, when they are not None
115
+ if self.name is not None:
116
+ job_description_dict["Name"] = self.name
117
+
118
+ if self.application_name is not None:
119
+ job_description_dict["ApplicationName"] = self.application_name
120
+
121
+ if self.application_version is not None:
122
+ job_description_dict["ApplicationVersion"] = self.application_version
123
+
124
+ if self.executable is not None:
125
+ job_description_dict["Executable"] = self.executable
126
+
127
+ if self.arguments is not None:
128
+ job_description_dict["Arguments"] = self.arguments
129
+
130
+ if self.environment is not None:
131
+ job_description_dict["Environment"] = self.environment
132
+
133
+ if self.parameters is not None:
134
+ job_description_dict["Parameters"] = self.parameters
135
+
136
+ if self.stdout is not None:
137
+ job_description_dict["Stdout"] = self.stdout
138
+
139
+ if self.stderr is not None:
140
+ job_description_dict["Stderr"] = self.stderr
141
+
142
+ if self.stdin is not None:
143
+ job_description_dict["Stdin"] = self.stdin
144
+
145
+ if self.ignore_non_zero_exit_code is not None:
146
+ job_description_dict["IgnoreNonZeroExitCode"] = self.ignore_non_zero_exit_code
147
+
148
+ if self.user_pre_command is not None:
149
+ job_description_dict["User precommand"] = self.user_pre_command
150
+
151
+ if self.run_user_pre_command_on_login_node is not None:
152
+ job_description_dict["RunUserPrecommandOnLoginNode"] = self.run_user_pre_command_on_login_node
153
+
154
+ if self.user_pre_command_ignore_non_zero_exit_code is not None:
155
+ job_description_dict["UserPrecommandIgnoreNonZeroExitCode"] = self.user_pre_command_ignore_non_zero_exit_code
156
+
157
+ if self.user_post_command is not None:
158
+ job_description_dict["User postcommand"] = self.user_post_command
159
+
160
+ if self.run_user_post_command_on_login_node is not None:
161
+ job_description_dict["RunUserPostcommandOnLoginNode"] = self.run_user_post_command_on_login_node
162
+
163
+ if self.user_post_command_ignore_non_zero_exit_code is not None:
164
+ job_description_dict["UserPostcommandIgnoreNonZeroExitCode"] = self.user_post_command_ignore_non_zero_exit_code
165
+
166
+ if self.resources is not None:
167
+ job_description_dict["Resources"] = self.resources
168
+
169
+ if self.project is not None:
170
+ job_description_dict["Project"] = self.project
171
+
172
+ if self.imports is not None:
173
+ job_description_dict["Imports"] = self.imports
174
+
175
+ if self.exports is not None:
176
+ job_description_dict["Exports"] = self.exports
177
+
178
+ if self.have_client_stagein is not None:
179
+ job_description_dict["haveClientStageIn"] =self.have_client_stagein
180
+
181
+ if self.job_type is not None:
182
+ job_description_dict["Job type"] = self.job_type
183
+
184
+ if self.login_node is not None:
185
+ job_description_dict["Login node"] = self.login_node
186
+
187
+ if self.bss_file is not None:
188
+ job_description_dict["BSS file"] = self.bss_file
189
+
190
+ if self.notification is not None:
191
+ job_description_dict["Notification"] = self.notification
192
+
193
+ if self.user_email is not None:
194
+ job_description_dict["User email"] = self.user_email
195
+
196
+ if self.tags is not None:
197
+ job_description_dict["Tags"] = self.tags
198
+
199
+ return job_description_dict
200
+
201
+ def get_uc_client(self, uc_conn_id: str | None = None) -> uc_client.Client:
202
+ """Return a unicore client for submitting the job. Will use provided base_url and credentials if possible, else it will use the default unicore connection from airflow."""
203
+ if self.base_url is not None and self.credential is not None:
204
+ return uc_client.Client(self.credential, self.base_url)
205
+ if uc_conn_id is None:
206
+ hook = unicore_hooks.UnicoreHook()
207
+ else:
208
+ hook = unicore_hooks.UnicoreHook(uc_conn_id=uc_conn_id)
209
+ return hook.get_conn(overwrite_base_url=self.base_url, overwrite_credential=self.credential)
210
+
211
+ def execute_async(self, context: Context) -> Any:
212
+ """Submits the job and returns the job object without waiting for it to finish."""
213
+ client = self.get_uc_client()
214
+ job = client.new_job(job_description=self.get_job_description(), inputs=[])
215
+ return job
216
+
217
+ def execute(self, context: Context) -> Any:
218
+ """
219
+ Submits the job description to the unicore server.
220
+ After the job is finished (failed or successful), some basic output such as logs, status messages, job id or file content will be retreived and returned via x_coms or airflow logs.
221
+ The exit code of this functions is the exit code of the unicore job, to ensure proper handling of failed or aborted jobs.
222
+ """
223
+ import logging
224
+ from pyunicore.client import JobStatus, Job
225
+ logger = logging.getLogger(__name__)
226
+
227
+ job: Job = self.execute_async(context) # TODO depending on params this may spawn multiple jobs -> in those cases, e.g. output needs to be handled differently
228
+ logger.debug(f"Waiting for unicore job {job}")
229
+ job.poll() # wait for job to finish
230
+
231
+ task_instance = context['task_instance']
232
+
233
+
234
+ task_instance.xcom_push(key="status_message", value=job.properties["statusMessage"])
235
+ task_instance.xcom_push(key="log", value=job.properties["log"])
236
+ for line in job.properties["log"]:
237
+ logger.info(f"UNICORE LOGS: {line}")
238
+
239
+ if job.status is not JobStatus.SUCCESSFUL:
240
+ from airflow.exceptions import AirflowFailException
241
+ logger.error(f"Unicore job not successful. Job state is {job.status}. Aborting this task.")
242
+ raise AirflowFailException
243
+
244
+
245
+ work_dir = job.working_dir
246
+
247
+ content = work_dir.contents()['content']
248
+ task_instance.xcom_push(key="workdir_content", value=content)
249
+
250
+ for filename in content.keys():
251
+ if "/UNICORE_Job_" in filename:
252
+ task_instance.xcom_push(key="Unicore Job ID", value=filename[13:])
253
+ jobt_text = work_dir.stat(filename).raw().read().decode("utf-8")
254
+ task_instance.xcom_push(key="UNICORE Job", value=jobt_text)
255
+ continue
256
+ if "bss_submit_" in filename:
257
+ bss_submit_text = work_dir.stat(filename).raw().read().decode("utf-8")
258
+ task_instance.xcom_push(key="BSS_SUBMIT", value=bss_submit_text)
259
+ continue
260
+
261
+ from requests.exceptions import HTTPError
262
+ for file in self.xcom_output_files:
263
+ try:
264
+ logger.debug(f"Retreiving file {file} from workdir.")
265
+ remote_file = work_dir.stat(file)
266
+ content = remote_file.raw().read().decode("utf-8")
267
+ task_instance.xcom_push(key=file,value=content)
268
+ except HTTPError as http_error:
269
+ logger.error(f"Error while retreiving file {file} from workdir.", http_error)
270
+ continue
271
+ except UnicodeDecodeError as unicore_error:
272
+ logger.error(f"Error while decoding file {file}.", unicore_error)
273
+
274
+ exit_code = job.properties["exitCode"]
275
+ return exit_code
276
+
277
+ class UnicoreScriptOperator(UnicoreGenericOperator):
278
+ """
279
+ This operator uplaods and submits a script to the unicore server as a job. Behaviour and parameters are otherwise the same as the UnicoreBaseOpertor.
280
+ """
281
+ def __init__(self, name: str, script_content: str, **kwargs):
282
+ """
283
+ :param script_content: the content of the script to be executed as a string
284
+ """
285
+ super().__init__(name=name, executable=DEFAULT_SCRIPT_NAME, **kwargs)
286
+ lines = script_content.split('\n')
287
+ script_stagein = {
288
+ "To": DEFAULT_SCRIPT_NAME,
289
+ "Data": lines
290
+ }
291
+ if self.imports is not None:
292
+ self.imports.append(script_stagein)
293
+ else:
294
+ self.imports = [script_stagein]
295
+
296
+ class UnicoreBSSOperator(UnicoreGenericOperator):
297
+ """
298
+ This operator uplaods and submits a BSS-script to the unicore server as a job. Behaviour and parameters are otherwise the same as the UnicoreBaseOpertor.
299
+ """
300
+ def __init__(self, name: str, bss_file_content: str, **kwargs):
301
+ """
302
+ :param bss_file_content: the content of the BSS-script to be executed as a string
303
+ """
304
+ super().__init__(name=name, bss_file=DEFAULT_BSS_FILE, job_type="raw", xcom_output_files=[], **kwargs)
305
+ lines = bss_file_content.split('\n')
306
+ bss_stagein = {
307
+ "To": DEFAULT_BSS_FILE,
308
+ "Data": lines
309
+ }
310
+ if self.imports is not None:
311
+ self.imports.append(bss_stagein)
312
+ else:
313
+ self.imports = [bss_stagein]
314
+
315
+ class UnicoreExecutableOperator(UnicoreGenericOperator):
316
+ """A simplified operator for just executing an executable. Still supports all parameters fo the UnicoreBaseOperator."""
317
+ def __init__(self, name: str, executable: str, xcom_output_files : List[str] = ["stdout","stderr"], **kwargs) -> None:
318
+ super().__init__(name=name, executable=executable, xcom_output_files=xcom_output_files, **kwargs)
319
+
320
+ class UnicoreDateOperator(UnicoreExecutableOperator):
321
+ """An operator for executing the 'date' executable. Mostly for testing purposes. Still supports all parameters fo the UnicoreBaseOperator."""
322
+ def __init__(self, name: str, **kwargs) -> None:
323
+ super().__init__(name=name, executable='date',**kwargs)
324
+
@@ -0,0 +1,17 @@
1
+ Metadata-Version: 2.1
2
+ Name: airflow-unicore-integration
3
+ Version: 0.0.4
4
+ Summary: Running Unicore Jobs from airflow DAGs.
5
+ Author-email: Christian Böttcher <c.boettcher@fz-juelich.de>
6
+ Project-URL: Homepage, https://gitlab.jsc.fz-juelich.de/boettcher1/airflow_unicore_integration
7
+ Project-URL: Issues, https://gitlab.jsc.fz-juelich.de/boettcher1/airflow_unicore_integration/-/issues
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Framework :: Apache Airflow :: Provider
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Operating System :: OS Independent
13
+ Requires-Python: >=3.10
14
+ Description-Content-Type: text/markdown
15
+ Requires-Dist: pyunicore >=1.0.0
16
+ Requires-Dist: apache-airflow ==2.8.4
17
+
@@ -0,0 +1,10 @@
1
+ airflow_unicore_integration/__init__.py,sha256=bh8G7mutfVy8umYxhoMZLBLufNxZCp44RxekRw2y4cw,395
2
+ airflow_unicore_integration/hooks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ airflow_unicore_integration/hooks/unicore_hooks.py,sha256=3sXaVYNhd6w-DdWS0VmtM0e037lUcRvUOuNRWz_bCYk,1768
4
+ airflow_unicore_integration/operators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ airflow_unicore_integration/operators/unicore_operators.py,sha256=1o9hhEJTcrSYuQ4AexWLXodylZwNerP9zpqgakJ7ZIk,16445
6
+ airflow_unicore_integration-0.0.4.dist-info/METADATA,sha256=R6KzqZ1Bng1AjIqSzsjJDK-xCAQdKm92CiymwsL-Hk0,756
7
+ airflow_unicore_integration-0.0.4.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
8
+ airflow_unicore_integration-0.0.4.dist-info/entry_points.txt,sha256=bmk1b2a4vYfZ2P4fbcPVSOLEThgKHSHftgjR82XiG6A,88
9
+ airflow_unicore_integration-0.0.4.dist-info/top_level.txt,sha256=j45X-uIuOk3oL78iwlpHakMWtUkg__B7zUlJLwmZx6w,28
10
+ airflow_unicore_integration-0.0.4.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (70.1.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [apache_airflow_provider]
2
+ provider_info = airflow_unicore_integration:get_provider_info
@@ -0,0 +1 @@
1
+ airflow_unicore_integration