apache-airflow-providers-cncf-kubernetes 3.1.0__py3-none-any.whl → 10.10.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/cncf/kubernetes/__init__.py +18 -23
- airflow/providers/cncf/kubernetes/backcompat/__init__.py +17 -0
- airflow/providers/cncf/kubernetes/backcompat/backwards_compat_converters.py +31 -49
- airflow/providers/cncf/kubernetes/callbacks.py +200 -0
- airflow/providers/cncf/kubernetes/cli/__init__.py +16 -0
- airflow/providers/cncf/kubernetes/cli/kubernetes_command.py +195 -0
- airflow/providers/cncf/kubernetes/decorators/kubernetes.py +163 -0
- airflow/providers/cncf/kubernetes/decorators/kubernetes_cmd.py +118 -0
- airflow/providers/cncf/kubernetes/exceptions.py +37 -0
- airflow/providers/cncf/kubernetes/executors/__init__.py +17 -0
- airflow/providers/cncf/kubernetes/executors/kubernetes_executor.py +831 -0
- airflow/providers/cncf/kubernetes/executors/kubernetes_executor_types.py +91 -0
- airflow/providers/cncf/kubernetes/executors/kubernetes_executor_utils.py +736 -0
- airflow/providers/cncf/kubernetes/executors/local_kubernetes_executor.py +306 -0
- airflow/providers/cncf/kubernetes/get_provider_info.py +249 -50
- airflow/providers/cncf/kubernetes/hooks/kubernetes.py +846 -112
- airflow/providers/cncf/kubernetes/k8s_model.py +62 -0
- airflow/providers/cncf/kubernetes/kube_client.py +156 -0
- airflow/providers/cncf/kubernetes/kube_config.py +125 -0
- airflow/providers/cncf/kubernetes/kubernetes_executor_templates/__init__.py +16 -0
- airflow/providers/cncf/kubernetes/kubernetes_executor_templates/basic_template.yaml +79 -0
- airflow/providers/cncf/kubernetes/kubernetes_helper_functions.py +165 -0
- airflow/providers/cncf/kubernetes/operators/custom_object_launcher.py +368 -0
- airflow/providers/cncf/kubernetes/operators/job.py +646 -0
- airflow/providers/cncf/kubernetes/operators/kueue.py +132 -0
- airflow/providers/cncf/kubernetes/operators/pod.py +1417 -0
- airflow/providers/cncf/kubernetes/operators/resource.py +191 -0
- airflow/providers/cncf/kubernetes/operators/spark_kubernetes.py +336 -35
- airflow/providers/cncf/kubernetes/pod_generator.py +592 -0
- airflow/providers/cncf/kubernetes/pod_template_file_examples/__init__.py +16 -0
- airflow/providers/cncf/kubernetes/pod_template_file_examples/dags_in_image_template.yaml +68 -0
- airflow/providers/cncf/kubernetes/pod_template_file_examples/dags_in_volume_template.yaml +74 -0
- airflow/providers/cncf/kubernetes/pod_template_file_examples/git_sync_template.yaml +95 -0
- airflow/providers/cncf/kubernetes/python_kubernetes_script.jinja2 +51 -0
- airflow/providers/cncf/kubernetes/python_kubernetes_script.py +92 -0
- airflow/providers/cncf/kubernetes/resource_convert/__init__.py +16 -0
- airflow/providers/cncf/kubernetes/resource_convert/configmap.py +52 -0
- airflow/providers/cncf/kubernetes/resource_convert/env_variable.py +39 -0
- airflow/providers/cncf/kubernetes/resource_convert/secret.py +40 -0
- airflow/providers/cncf/kubernetes/secret.py +128 -0
- airflow/providers/cncf/kubernetes/sensors/spark_kubernetes.py +30 -14
- airflow/providers/cncf/kubernetes/template_rendering.py +81 -0
- airflow/providers/cncf/kubernetes/triggers/__init__.py +16 -0
- airflow/providers/cncf/kubernetes/triggers/job.py +176 -0
- airflow/providers/cncf/kubernetes/triggers/pod.py +344 -0
- airflow/providers/cncf/kubernetes/utils/__init__.py +3 -0
- airflow/providers/cncf/kubernetes/utils/container.py +118 -0
- airflow/providers/cncf/kubernetes/utils/delete_from.py +154 -0
- airflow/providers/cncf/kubernetes/utils/k8s_resource_iterator.py +46 -0
- airflow/providers/cncf/kubernetes/utils/pod_manager.py +887 -152
- airflow/providers/cncf/kubernetes/utils/xcom_sidecar.py +25 -16
- airflow/providers/cncf/kubernetes/version_compat.py +38 -0
- apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info/METADATA +125 -0
- apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info/RECORD +62 -0
- {apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info → apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info}/WHEEL +1 -2
- apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info/entry_points.txt +3 -0
- apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info/licenses/NOTICE +5 -0
- airflow/providers/cncf/kubernetes/backcompat/pod.py +0 -119
- airflow/providers/cncf/kubernetes/backcompat/pod_runtime_info_env.py +0 -56
- airflow/providers/cncf/kubernetes/backcompat/volume.py +0 -62
- airflow/providers/cncf/kubernetes/backcompat/volume_mount.py +0 -58
- airflow/providers/cncf/kubernetes/example_dags/example_kubernetes.py +0 -163
- airflow/providers/cncf/kubernetes/example_dags/example_spark_kubernetes.py +0 -66
- airflow/providers/cncf/kubernetes/example_dags/example_spark_kubernetes_spark_pi.yaml +0 -57
- airflow/providers/cncf/kubernetes/operators/kubernetes_pod.py +0 -622
- apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info/METADATA +0 -452
- apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info/NOTICE +0 -6
- apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info/RECORD +0 -29
- apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info/entry_points.txt +0 -3
- apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info/top_level.txt +0 -1
- /airflow/providers/cncf/kubernetes/{example_dags → decorators}/__init__.py +0 -0
- {apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info → apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info/licenses}/LICENSE +0 -0
|
@@ -14,27 +14,54 @@
|
|
|
14
14
|
# KIND, either express or implied. See the License for the
|
|
15
15
|
# specific language governing permissions and limitations
|
|
16
16
|
# under the License.
|
|
17
|
-
import
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import asyncio
|
|
20
|
+
import contextlib
|
|
21
|
+
import json
|
|
18
22
|
import tempfile
|
|
19
|
-
from
|
|
23
|
+
from collections.abc import Generator
|
|
24
|
+
from functools import cached_property
|
|
25
|
+
from time import sleep
|
|
26
|
+
from typing import TYPE_CHECKING, Any, Protocol
|
|
27
|
+
|
|
28
|
+
import aiofiles
|
|
29
|
+
import requests
|
|
30
|
+
import tenacity
|
|
31
|
+
from asgiref.sync import sync_to_async
|
|
32
|
+
from kubernetes import client, config, utils, watch
|
|
33
|
+
from kubernetes.client.models import V1Deployment
|
|
34
|
+
from kubernetes.config import ConfigException
|
|
35
|
+
from kubernetes_asyncio import client as async_client, config as async_config
|
|
36
|
+
from urllib3.exceptions import HTTPError
|
|
20
37
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
38
|
+
from airflow.exceptions import AirflowException, AirflowNotFoundException
|
|
39
|
+
from airflow.models import Connection
|
|
40
|
+
from airflow.providers.cncf.kubernetes.exceptions import KubernetesApiError, KubernetesApiPermissionError
|
|
41
|
+
from airflow.providers.cncf.kubernetes.kube_client import _disable_verify_ssl, _enable_tcp_keepalive
|
|
42
|
+
from airflow.providers.cncf.kubernetes.kubernetes_helper_functions import should_retry_creation
|
|
43
|
+
from airflow.providers.cncf.kubernetes.utils.container import (
|
|
44
|
+
container_is_completed,
|
|
45
|
+
container_is_running,
|
|
46
|
+
)
|
|
47
|
+
from airflow.providers.common.compat.sdk import BaseHook
|
|
48
|
+
from airflow.utils import yaml
|
|
25
49
|
|
|
26
|
-
|
|
50
|
+
if TYPE_CHECKING:
|
|
51
|
+
from kubernetes.client import V1JobList
|
|
52
|
+
from kubernetes.client.models import CoreV1EventList, V1Job, V1Pod
|
|
27
53
|
|
|
28
|
-
|
|
29
|
-
import airflow.utils.yaml as yaml
|
|
30
|
-
except ImportError:
|
|
31
|
-
import yaml # type: ignore[no-redef]
|
|
54
|
+
LOADING_KUBE_CONFIG_FILE_RESOURCE = "Loading Kubernetes configuration file kube_config from {}..."
|
|
32
55
|
|
|
33
|
-
|
|
34
|
-
|
|
56
|
+
JOB_FINAL_STATUS_CONDITION_TYPES = {
|
|
57
|
+
"Complete",
|
|
58
|
+
"Failed",
|
|
59
|
+
}
|
|
35
60
|
|
|
61
|
+
JOB_STATUS_CONDITION_TYPES = JOB_FINAL_STATUS_CONDITION_TYPES | {"Suspended"}
|
|
36
62
|
|
|
37
|
-
|
|
63
|
+
|
|
64
|
+
def _load_body_to_dict(body: str) -> dict:
|
|
38
65
|
try:
|
|
39
66
|
body_dict = yaml.safe_load(body)
|
|
40
67
|
except yaml.YAMLError as e:
|
|
@@ -42,14 +69,44 @@ def _load_body_to_dict(body):
|
|
|
42
69
|
return body_dict
|
|
43
70
|
|
|
44
71
|
|
|
45
|
-
class
|
|
72
|
+
class PodOperatorHookProtocol(Protocol):
|
|
73
|
+
"""
|
|
74
|
+
Protocol to define methods relied upon by KubernetesPodOperator.
|
|
75
|
+
|
|
76
|
+
Subclasses of KubernetesPodOperator, such as GKEStartPodOperator, may use
|
|
77
|
+
hooks that don't extend KubernetesHook. We use this protocol to document the
|
|
78
|
+
methods used by KPO and ensure that these methods exist on such other hooks.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def core_v1_client(self) -> client.CoreV1Api:
|
|
83
|
+
"""Get authenticated client object."""
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def is_in_cluster(self) -> bool:
|
|
87
|
+
"""Expose whether the hook is configured with ``load_incluster_config`` or not."""
|
|
88
|
+
|
|
89
|
+
def get_pod(self, name: str, namespace: str) -> V1Pod:
|
|
90
|
+
"""Read pod object from kubernetes API."""
|
|
91
|
+
|
|
92
|
+
def get_namespace(self) -> str | None:
|
|
93
|
+
"""Return the namespace that defined in the connection."""
|
|
94
|
+
|
|
95
|
+
def get_xcom_sidecar_container_image(self) -> str | None:
|
|
96
|
+
"""Return the xcom sidecar image that defined in the connection."""
|
|
97
|
+
|
|
98
|
+
def get_xcom_sidecar_container_resources(self) -> str | None:
|
|
99
|
+
"""Return the xcom sidecar resources that defined in the connection."""
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class KubernetesHook(BaseHook, PodOperatorHookProtocol):
|
|
46
103
|
"""
|
|
47
104
|
Creates Kubernetes API connection.
|
|
48
105
|
|
|
49
|
-
- use in cluster configuration by using ``
|
|
50
|
-
- use custom config by providing path to the file using ``
|
|
106
|
+
- use in cluster configuration by using extra field ``in_cluster`` in connection
|
|
107
|
+
- use custom config by providing path to the file using extra field ``kube_config_path`` in connection
|
|
51
108
|
- use custom configuration by providing content of kubeconfig file via
|
|
52
|
-
``
|
|
109
|
+
extra field ``kube_config`` in connection
|
|
53
110
|
- use default config by providing no extras
|
|
54
111
|
|
|
55
112
|
This hook check for configuration option in the above order. Once an option is present it will
|
|
@@ -61,58 +118,79 @@ class KubernetesHook(BaseHook):
|
|
|
61
118
|
|
|
62
119
|
:param conn_id: The :ref:`kubernetes connection <howto/connection:kubernetes>`
|
|
63
120
|
to Kubernetes cluster.
|
|
121
|
+
:param client_configuration: Optional dictionary of client configuration params.
|
|
122
|
+
Passed on to kubernetes client.
|
|
123
|
+
:param cluster_context: Optionally specify a context to use (e.g. if you have multiple
|
|
124
|
+
in your kubeconfig.
|
|
125
|
+
:param config_file: Path to kubeconfig file.
|
|
126
|
+
:param config_dict: Takes the config file as a dict.
|
|
127
|
+
:param in_cluster: Set to ``True`` if running from within a kubernetes cluster.
|
|
128
|
+
:param disable_verify_ssl: Set to ``True`` if SSL verification should be disabled.
|
|
129
|
+
:param disable_tcp_keepalive: Set to ``True`` if you want to disable keepalive logic.
|
|
64
130
|
"""
|
|
65
131
|
|
|
66
|
-
conn_name_attr =
|
|
67
|
-
default_conn_name =
|
|
68
|
-
conn_type =
|
|
69
|
-
hook_name =
|
|
132
|
+
conn_name_attr = "kubernetes_conn_id"
|
|
133
|
+
default_conn_name = "kubernetes_default"
|
|
134
|
+
conn_type = "kubernetes"
|
|
135
|
+
hook_name = "Kubernetes Cluster Connection"
|
|
70
136
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
137
|
+
DEFAULT_NAMESPACE = "default"
|
|
138
|
+
|
|
139
|
+
@classmethod
|
|
140
|
+
def get_connection_form_widgets(cls) -> dict[str, Any]:
|
|
141
|
+
"""Return connection widgets to add to connection form."""
|
|
142
|
+
from flask_appbuilder.fieldwidgets import BS3PasswordFieldWidget, BS3TextFieldWidget
|
|
75
143
|
from flask_babel import lazy_gettext
|
|
76
|
-
from wtforms import BooleanField, StringField
|
|
144
|
+
from wtforms import BooleanField, PasswordField, StringField
|
|
77
145
|
|
|
78
146
|
return {
|
|
79
|
-
"
|
|
80
|
-
"
|
|
81
|
-
|
|
147
|
+
"in_cluster": BooleanField(lazy_gettext("In cluster configuration")),
|
|
148
|
+
"kube_config_path": StringField(lazy_gettext("Kube config path"), widget=BS3TextFieldWidget()),
|
|
149
|
+
"kube_config": PasswordField(
|
|
150
|
+
lazy_gettext("Kube config (JSON format)"), widget=BS3PasswordFieldWidget()
|
|
82
151
|
),
|
|
83
|
-
"
|
|
84
|
-
|
|
152
|
+
"namespace": StringField(lazy_gettext("Namespace"), widget=BS3TextFieldWidget()),
|
|
153
|
+
"cluster_context": StringField(lazy_gettext("Cluster context"), widget=BS3TextFieldWidget()),
|
|
154
|
+
"disable_verify_ssl": BooleanField(lazy_gettext("Disable SSL")),
|
|
155
|
+
"disable_tcp_keepalive": BooleanField(lazy_gettext("Disable TCP keepalive")),
|
|
156
|
+
"xcom_sidecar_container_image": StringField(
|
|
157
|
+
lazy_gettext("XCom sidecar image"), widget=BS3TextFieldWidget()
|
|
85
158
|
),
|
|
86
|
-
"
|
|
87
|
-
lazy_gettext(
|
|
88
|
-
),
|
|
89
|
-
"extra__kubernetes__cluster_context": StringField(
|
|
90
|
-
lazy_gettext('Cluster context'), widget=BS3TextFieldWidget()
|
|
159
|
+
"xcom_sidecar_container_resources": StringField(
|
|
160
|
+
lazy_gettext("XCom sidecar resources (JSON format)"), widget=BS3TextFieldWidget()
|
|
91
161
|
),
|
|
92
162
|
}
|
|
93
163
|
|
|
94
|
-
@
|
|
95
|
-
def get_ui_field_behaviour() ->
|
|
96
|
-
"""
|
|
164
|
+
@classmethod
|
|
165
|
+
def get_ui_field_behaviour(cls) -> dict[str, Any]:
|
|
166
|
+
"""Return custom field behaviour."""
|
|
97
167
|
return {
|
|
98
|
-
"hidden_fields": [
|
|
168
|
+
"hidden_fields": ["host", "schema", "login", "password", "port", "extra"],
|
|
99
169
|
"relabeling": {},
|
|
100
170
|
}
|
|
101
171
|
|
|
102
172
|
def __init__(
|
|
103
173
|
self,
|
|
104
|
-
conn_id:
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
174
|
+
conn_id: str | None = None,
|
|
175
|
+
kubernetes_conn_id: str | None = default_conn_name,
|
|
176
|
+
client_configuration: client.Configuration | None = None,
|
|
177
|
+
cluster_context: str | None = None,
|
|
178
|
+
config_file: str | None = None,
|
|
179
|
+
config_dict: dict | None = None,
|
|
180
|
+
in_cluster: bool | None = None,
|
|
181
|
+
disable_verify_ssl: bool | None = None,
|
|
182
|
+
disable_tcp_keepalive: bool | None = None,
|
|
109
183
|
) -> None:
|
|
110
184
|
super().__init__()
|
|
111
|
-
self.conn_id = conn_id
|
|
185
|
+
self.conn_id = conn_id or kubernetes_conn_id
|
|
112
186
|
self.client_configuration = client_configuration
|
|
113
187
|
self.cluster_context = cluster_context
|
|
114
188
|
self.config_file = config_file
|
|
189
|
+
self.config_dict = config_dict
|
|
115
190
|
self.in_cluster = in_cluster
|
|
191
|
+
self.disable_verify_ssl = disable_verify_ssl
|
|
192
|
+
self.disable_tcp_keepalive = disable_tcp_keepalive
|
|
193
|
+
self._is_in_cluster: bool | None = None
|
|
116
194
|
|
|
117
195
|
@staticmethod
|
|
118
196
|
def _coalesce_param(*params):
|
|
@@ -120,38 +198,86 @@ class KubernetesHook(BaseHook):
|
|
|
120
198
|
if param is not None:
|
|
121
199
|
return param
|
|
122
200
|
|
|
123
|
-
|
|
124
|
-
|
|
201
|
+
@classmethod
|
|
202
|
+
def get_connection(cls, conn_id: str) -> Connection:
|
|
203
|
+
"""
|
|
204
|
+
Return requested connection.
|
|
205
|
+
|
|
206
|
+
If missing and conn_id is "kubernetes_default", will return empty connection so that hook will
|
|
207
|
+
default to cluster-derived credentials.
|
|
208
|
+
"""
|
|
209
|
+
try:
|
|
210
|
+
return super().get_connection(conn_id) # type: ignore[return-value]
|
|
211
|
+
except AirflowNotFoundException:
|
|
212
|
+
if conn_id == cls.default_conn_name:
|
|
213
|
+
return Connection(conn_id=cls.default_conn_name)
|
|
214
|
+
raise
|
|
215
|
+
|
|
216
|
+
@cached_property
|
|
217
|
+
def conn_extras(self):
|
|
125
218
|
if self.conn_id:
|
|
126
219
|
connection = self.get_connection(self.conn_id)
|
|
127
220
|
extras = connection.extra_dejson
|
|
128
221
|
else:
|
|
129
222
|
extras = {}
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
223
|
+
return extras
|
|
224
|
+
|
|
225
|
+
def _get_field(self, field_name):
|
|
226
|
+
"""
|
|
227
|
+
Handle backcompat for extra fields.
|
|
228
|
+
|
|
229
|
+
Prior to Airflow 2.3, in order to make use of UI customizations for extra fields,
|
|
230
|
+
we needed to store them with the prefix ``extra__kubernetes__``. This method
|
|
231
|
+
handles the backcompat, i.e. if the extra dict contains prefixed fields.
|
|
232
|
+
"""
|
|
233
|
+
if field_name.startswith("extra__"):
|
|
234
|
+
raise ValueError(
|
|
235
|
+
f"Got prefixed name {field_name}; please remove the 'extra__kubernetes__' prefix "
|
|
236
|
+
f"when using this method."
|
|
237
|
+
)
|
|
238
|
+
if field_name in self.conn_extras:
|
|
239
|
+
return self.conn_extras[field_name] or None
|
|
240
|
+
prefixed_name = f"extra__kubernetes__{field_name}"
|
|
241
|
+
return self.conn_extras.get(prefixed_name) or None
|
|
242
|
+
|
|
243
|
+
def get_conn(self) -> client.ApiClient:
|
|
244
|
+
"""Return kubernetes api session for use with requests."""
|
|
245
|
+
in_cluster = self._coalesce_param(self.in_cluster, self._get_field("in_cluster"))
|
|
246
|
+
cluster_context = self._coalesce_param(self.cluster_context, self._get_field("cluster_context"))
|
|
247
|
+
kubeconfig_path = self._coalesce_param(self.config_file, self._get_field("kube_config_path"))
|
|
248
|
+
kubeconfig = self._get_field("kube_config")
|
|
249
|
+
num_selected_configuration = sum(
|
|
250
|
+
1 for o in [in_cluster, kubeconfig, kubeconfig_path, self.config_dict] if o
|
|
138
251
|
)
|
|
139
|
-
kubeconfig = extras.get("extra__kubernetes__kube_config") or None
|
|
140
|
-
num_selected_configuration = len([o for o in [in_cluster, kubeconfig, kubeconfig_path] if o])
|
|
141
252
|
|
|
142
253
|
if num_selected_configuration > 1:
|
|
143
254
|
raise AirflowException(
|
|
144
255
|
"Invalid connection configuration. Options kube_config_path, "
|
|
145
|
-
"kube_config, in_cluster are mutually exclusive. "
|
|
256
|
+
"kube_config, in_cluster, config_dict are mutually exclusive. "
|
|
146
257
|
"You can only use one option at a time."
|
|
147
258
|
)
|
|
259
|
+
|
|
260
|
+
disable_verify_ssl = self._coalesce_param(
|
|
261
|
+
self.disable_verify_ssl, _get_bool(self._get_field("disable_verify_ssl"))
|
|
262
|
+
)
|
|
263
|
+
disable_tcp_keepalive = self._coalesce_param(
|
|
264
|
+
self.disable_tcp_keepalive, _get_bool(self._get_field("disable_tcp_keepalive"))
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
if disable_verify_ssl is True:
|
|
268
|
+
_disable_verify_ssl()
|
|
269
|
+
if disable_tcp_keepalive is not True:
|
|
270
|
+
_enable_tcp_keepalive()
|
|
271
|
+
|
|
148
272
|
if in_cluster:
|
|
149
273
|
self.log.debug("loading kube_config from: in_cluster configuration")
|
|
274
|
+
self._is_in_cluster = True
|
|
150
275
|
config.load_incluster_config()
|
|
151
276
|
return client.ApiClient()
|
|
152
277
|
|
|
153
278
|
if kubeconfig_path is not None:
|
|
154
279
|
self.log.debug("loading kube_config from: %s", kubeconfig_path)
|
|
280
|
+
self._is_in_cluster = False
|
|
155
281
|
config.load_kube_config(
|
|
156
282
|
config_file=kubeconfig_path,
|
|
157
283
|
client_configuration=self.client_configuration,
|
|
@@ -162,8 +288,11 @@ class KubernetesHook(BaseHook):
|
|
|
162
288
|
if kubeconfig is not None:
|
|
163
289
|
with tempfile.NamedTemporaryFile() as temp_config:
|
|
164
290
|
self.log.debug("loading kube_config from: connection kube_config")
|
|
291
|
+
if isinstance(kubeconfig, dict):
|
|
292
|
+
kubeconfig = json.dumps(kubeconfig)
|
|
165
293
|
temp_config.write(kubeconfig.encode())
|
|
166
294
|
temp_config.flush()
|
|
295
|
+
self._is_in_cluster = False
|
|
167
296
|
config.load_kube_config(
|
|
168
297
|
config_file=temp_config.name,
|
|
169
298
|
client_configuration=self.client_configuration,
|
|
@@ -171,27 +300,71 @@ class KubernetesHook(BaseHook):
|
|
|
171
300
|
)
|
|
172
301
|
return client.ApiClient()
|
|
173
302
|
|
|
174
|
-
self.
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
303
|
+
if self.config_dict:
|
|
304
|
+
self.log.debug(LOADING_KUBE_CONFIG_FILE_RESOURCE.format("config dictionary"))
|
|
305
|
+
self._is_in_cluster = False
|
|
306
|
+
config.load_kube_config_from_dict(
|
|
307
|
+
config_dict=self.config_dict,
|
|
308
|
+
client_configuration=self.client_configuration,
|
|
309
|
+
context=cluster_context,
|
|
310
|
+
)
|
|
311
|
+
return client.ApiClient()
|
|
312
|
+
|
|
313
|
+
return self._get_default_client(cluster_context=cluster_context)
|
|
314
|
+
|
|
315
|
+
def _get_default_client(self, *, cluster_context: str | None = None) -> client.ApiClient:
|
|
316
|
+
# if we get here, then no configuration has been supplied
|
|
317
|
+
# we should try in_cluster since that's most likely
|
|
318
|
+
# but failing that just load assuming a kubeconfig file
|
|
319
|
+
# in the default location
|
|
320
|
+
try:
|
|
321
|
+
config.load_incluster_config(client_configuration=self.client_configuration)
|
|
322
|
+
self._is_in_cluster = True
|
|
323
|
+
except ConfigException:
|
|
324
|
+
self.log.debug("loading kube_config from: default file")
|
|
325
|
+
self._is_in_cluster = False
|
|
326
|
+
config.load_kube_config(
|
|
327
|
+
client_configuration=self.client_configuration,
|
|
328
|
+
context=cluster_context,
|
|
329
|
+
)
|
|
179
330
|
return client.ApiClient()
|
|
180
331
|
|
|
332
|
+
@property
|
|
333
|
+
def is_in_cluster(self) -> bool:
|
|
334
|
+
"""Expose whether the hook is configured with ``load_incluster_config`` or not."""
|
|
335
|
+
if self._is_in_cluster is not None:
|
|
336
|
+
return self._is_in_cluster
|
|
337
|
+
self.api_client # so we can determine if we are in_cluster or not
|
|
338
|
+
if TYPE_CHECKING:
|
|
339
|
+
assert self._is_in_cluster is not None
|
|
340
|
+
return self._is_in_cluster
|
|
341
|
+
|
|
181
342
|
@cached_property
|
|
182
|
-
def api_client(self) ->
|
|
183
|
-
"""Cached Kubernetes API client"""
|
|
343
|
+
def api_client(self) -> client.ApiClient:
|
|
344
|
+
"""Cached Kubernetes API client."""
|
|
184
345
|
return self.get_conn()
|
|
185
346
|
|
|
186
347
|
@cached_property
|
|
187
|
-
def core_v1_client(self):
|
|
348
|
+
def core_v1_client(self) -> client.CoreV1Api:
|
|
188
349
|
return client.CoreV1Api(api_client=self.api_client)
|
|
189
350
|
|
|
351
|
+
@cached_property
|
|
352
|
+
def apps_v1_client(self) -> client.AppsV1Api:
|
|
353
|
+
return client.AppsV1Api(api_client=self.api_client)
|
|
354
|
+
|
|
355
|
+
@cached_property
|
|
356
|
+
def custom_object_client(self) -> client.CustomObjectsApi:
|
|
357
|
+
return client.CustomObjectsApi(api_client=self.api_client)
|
|
358
|
+
|
|
359
|
+
@cached_property
|
|
360
|
+
def batch_v1_client(self) -> client.BatchV1Api:
|
|
361
|
+
return client.BatchV1Api(api_client=self.api_client)
|
|
362
|
+
|
|
190
363
|
def create_custom_object(
|
|
191
|
-
self, group: str, version: str, plural: str, body:
|
|
364
|
+
self, group: str, version: str, plural: str, body: str | dict, namespace: str | None = None
|
|
192
365
|
):
|
|
193
366
|
"""
|
|
194
|
-
|
|
367
|
+
Create custom resource definition object in Kubernetes.
|
|
195
368
|
|
|
196
369
|
:param group: api group
|
|
197
370
|
:param version: api version
|
|
@@ -199,25 +372,29 @@ class KubernetesHook(BaseHook):
|
|
|
199
372
|
:param body: crd object definition
|
|
200
373
|
:param namespace: kubernetes namespace
|
|
201
374
|
"""
|
|
202
|
-
api
|
|
203
|
-
|
|
204
|
-
namespace = self.get_namespace()
|
|
375
|
+
api: client.CustomObjectsApi = self.custom_object_client
|
|
376
|
+
|
|
205
377
|
if isinstance(body, str):
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
378
|
+
body_dict = _load_body_to_dict(body)
|
|
379
|
+
else:
|
|
380
|
+
body_dict = body
|
|
381
|
+
|
|
382
|
+
response = api.create_namespaced_custom_object(
|
|
383
|
+
group=group,
|
|
384
|
+
version=version,
|
|
385
|
+
namespace=namespace or self.get_namespace() or self.DEFAULT_NAMESPACE,
|
|
386
|
+
plural=plural,
|
|
387
|
+
body=body_dict,
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
self.log.debug("Response: %s", response)
|
|
391
|
+
return response
|
|
215
392
|
|
|
216
393
|
def get_custom_object(
|
|
217
|
-
self, group: str, version: str, plural: str, name: str, namespace:
|
|
394
|
+
self, group: str, version: str, plural: str, name: str, namespace: str | None = None
|
|
218
395
|
):
|
|
219
396
|
"""
|
|
220
|
-
Get custom resource definition object from Kubernetes
|
|
397
|
+
Get custom resource definition object from Kubernetes.
|
|
221
398
|
|
|
222
399
|
:param group: api group
|
|
223
400
|
:param version: api version
|
|
@@ -226,67 +403,624 @@ class KubernetesHook(BaseHook):
|
|
|
226
403
|
:param namespace: kubernetes namespace
|
|
227
404
|
"""
|
|
228
405
|
api = client.CustomObjectsApi(self.api_client)
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
406
|
+
response = api.get_namespaced_custom_object(
|
|
407
|
+
group=group,
|
|
408
|
+
version=version,
|
|
409
|
+
namespace=namespace or self.get_namespace() or self.DEFAULT_NAMESPACE,
|
|
410
|
+
plural=plural,
|
|
411
|
+
name=name,
|
|
412
|
+
)
|
|
413
|
+
return response
|
|
414
|
+
|
|
415
|
+
def delete_custom_object(
|
|
416
|
+
self, group: str, version: str, plural: str, name: str, namespace: str | None = None, **kwargs
|
|
417
|
+
):
|
|
418
|
+
"""
|
|
419
|
+
Delete custom resource definition object from Kubernetes.
|
|
420
|
+
|
|
421
|
+
:param group: api group
|
|
422
|
+
:param version: api version
|
|
423
|
+
:param plural: api plural
|
|
424
|
+
:param name: crd object name
|
|
425
|
+
:param namespace: kubernetes namespace
|
|
426
|
+
"""
|
|
427
|
+
api = client.CustomObjectsApi(self.api_client)
|
|
428
|
+
return api.delete_namespaced_custom_object(
|
|
429
|
+
group=group,
|
|
430
|
+
version=version,
|
|
431
|
+
namespace=namespace or self.get_namespace() or self.DEFAULT_NAMESPACE,
|
|
432
|
+
plural=plural,
|
|
433
|
+
name=name,
|
|
434
|
+
**kwargs,
|
|
435
|
+
)
|
|
238
436
|
|
|
239
|
-
def get_namespace(self) ->
|
|
240
|
-
"""
|
|
437
|
+
def get_namespace(self) -> str | None:
|
|
438
|
+
"""Return the namespace that defined in the connection."""
|
|
241
439
|
if self.conn_id:
|
|
242
|
-
|
|
243
|
-
extras = connection.extra_dejson
|
|
244
|
-
namespace = extras.get("extra__kubernetes__namespace", "default")
|
|
245
|
-
return namespace
|
|
440
|
+
return self._get_field("namespace")
|
|
246
441
|
return None
|
|
247
442
|
|
|
443
|
+
def get_xcom_sidecar_container_image(self):
|
|
444
|
+
"""Return the xcom sidecar image that defined in the connection."""
|
|
445
|
+
return self._get_field("xcom_sidecar_container_image")
|
|
446
|
+
|
|
447
|
+
def get_xcom_sidecar_container_resources(self):
|
|
448
|
+
"""Return the xcom sidecar resources that defined in the connection."""
|
|
449
|
+
field = self._get_field("xcom_sidecar_container_resources")
|
|
450
|
+
if not field:
|
|
451
|
+
return None
|
|
452
|
+
return json.loads(field)
|
|
453
|
+
|
|
248
454
|
def get_pod_log_stream(
|
|
249
455
|
self,
|
|
250
456
|
pod_name: str,
|
|
251
|
-
container:
|
|
252
|
-
namespace:
|
|
253
|
-
) ->
|
|
457
|
+
container: str | None = "",
|
|
458
|
+
namespace: str | None = None,
|
|
459
|
+
) -> tuple[watch.Watch, Generator[str, None, None]]:
|
|
254
460
|
"""
|
|
255
|
-
|
|
461
|
+
Retrieve a log stream for a container in a kubernetes pod.
|
|
256
462
|
|
|
257
463
|
:param pod_name: pod name
|
|
258
464
|
:param container: container name
|
|
259
465
|
:param namespace: kubernetes namespace
|
|
260
466
|
"""
|
|
261
|
-
api = client.CoreV1Api(self.api_client)
|
|
262
467
|
watcher = watch.Watch()
|
|
263
468
|
return (
|
|
264
469
|
watcher,
|
|
265
470
|
watcher.stream(
|
|
266
|
-
|
|
471
|
+
self.core_v1_client.read_namespaced_pod_log,
|
|
267
472
|
name=pod_name,
|
|
268
473
|
container=container,
|
|
269
|
-
namespace=namespace
|
|
474
|
+
namespace=namespace or self.get_namespace() or self.DEFAULT_NAMESPACE,
|
|
270
475
|
),
|
|
271
476
|
)
|
|
272
477
|
|
|
273
478
|
def get_pod_logs(
|
|
274
479
|
self,
|
|
275
480
|
pod_name: str,
|
|
276
|
-
container:
|
|
277
|
-
namespace:
|
|
481
|
+
container: str | None = "",
|
|
482
|
+
namespace: str | None = None,
|
|
278
483
|
):
|
|
279
484
|
"""
|
|
280
|
-
|
|
485
|
+
Retrieve a container's log from the specified pod.
|
|
281
486
|
|
|
282
487
|
:param pod_name: pod name
|
|
283
488
|
:param container: container name
|
|
284
489
|
:param namespace: kubernetes namespace
|
|
285
490
|
"""
|
|
286
|
-
|
|
287
|
-
return api.read_namespaced_pod_log(
|
|
491
|
+
return self.core_v1_client.read_namespaced_pod_log(
|
|
288
492
|
name=pod_name,
|
|
289
493
|
container=container,
|
|
290
494
|
_preload_content=False,
|
|
291
|
-
namespace=namespace
|
|
495
|
+
namespace=namespace or self.get_namespace() or self.DEFAULT_NAMESPACE,
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
def get_pod(self, name: str, namespace: str) -> V1Pod:
|
|
499
|
+
"""Read pod object from kubernetes API."""
|
|
500
|
+
return self.core_v1_client.read_namespaced_pod(
|
|
501
|
+
name=name,
|
|
502
|
+
namespace=namespace,
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
def get_namespaced_pod_list(
|
|
506
|
+
self,
|
|
507
|
+
label_selector: str | None = "",
|
|
508
|
+
namespace: str | None = None,
|
|
509
|
+
watch: bool = False,
|
|
510
|
+
**kwargs,
|
|
511
|
+
):
|
|
512
|
+
"""
|
|
513
|
+
Retrieve a list of Kind pod which belong default kubernetes namespace.
|
|
514
|
+
|
|
515
|
+
:param label_selector: A selector to restrict the list of returned objects by their labels
|
|
516
|
+
:param namespace: kubernetes namespace
|
|
517
|
+
:param watch: Watch for changes to the described resources and return them as a stream
|
|
518
|
+
"""
|
|
519
|
+
return self.core_v1_client.list_namespaced_pod(
|
|
520
|
+
namespace=namespace or self.get_namespace() or self.DEFAULT_NAMESPACE,
|
|
521
|
+
watch=watch,
|
|
522
|
+
label_selector=label_selector,
|
|
523
|
+
_preload_content=False,
|
|
524
|
+
**kwargs,
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
def get_deployment_status(
|
|
528
|
+
self,
|
|
529
|
+
name: str,
|
|
530
|
+
namespace: str = "default",
|
|
531
|
+
**kwargs,
|
|
532
|
+
) -> V1Deployment:
|
|
533
|
+
"""
|
|
534
|
+
Get status of existing Deployment.
|
|
535
|
+
|
|
536
|
+
:param name: Name of Deployment to retrieve
|
|
537
|
+
:param namespace: Deployment namespace
|
|
538
|
+
"""
|
|
539
|
+
return self.apps_v1_client.read_namespaced_deployment_status(
|
|
540
|
+
name=name, namespace=namespace, pretty=True, **kwargs
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
@tenacity.retry(
|
|
544
|
+
stop=tenacity.stop_after_attempt(3),
|
|
545
|
+
wait=tenacity.wait_random_exponential(),
|
|
546
|
+
reraise=True,
|
|
547
|
+
retry=tenacity.retry_if_exception(should_retry_creation),
|
|
548
|
+
)
|
|
549
|
+
def create_job(
|
|
550
|
+
self,
|
|
551
|
+
job: V1Job,
|
|
552
|
+
**kwargs,
|
|
553
|
+
) -> V1Job:
|
|
554
|
+
"""
|
|
555
|
+
Run Job.
|
|
556
|
+
|
|
557
|
+
:param job: A kubernetes Job object
|
|
558
|
+
"""
|
|
559
|
+
sanitized_job = self.batch_v1_client.api_client.sanitize_for_serialization(job)
|
|
560
|
+
json_job = json.dumps(sanitized_job, indent=2)
|
|
561
|
+
|
|
562
|
+
self.log.debug("Job Creation Request: \n%s", json_job)
|
|
563
|
+
try:
|
|
564
|
+
resp = self.batch_v1_client.create_namespaced_job(
|
|
565
|
+
body=sanitized_job, namespace=job.metadata.namespace, **kwargs
|
|
566
|
+
)
|
|
567
|
+
self.log.debug("Job Creation Response: %s", resp)
|
|
568
|
+
except Exception as e:
|
|
569
|
+
self.log.exception(
|
|
570
|
+
"Exception when attempting to create Namespaced Job: %s", str(json_job).replace("\n", " ")
|
|
571
|
+
)
|
|
572
|
+
raise e
|
|
573
|
+
return resp
|
|
574
|
+
|
|
575
|
+
def get_job(self, job_name: str, namespace: str) -> V1Job:
|
|
576
|
+
"""
|
|
577
|
+
Get Job of specified name and namespace.
|
|
578
|
+
|
|
579
|
+
:param job_name: Name of Job to fetch.
|
|
580
|
+
:param namespace: Namespace of the Job.
|
|
581
|
+
:return: Job object
|
|
582
|
+
"""
|
|
583
|
+
return self.batch_v1_client.read_namespaced_job(name=job_name, namespace=namespace, pretty=True)
|
|
584
|
+
|
|
585
|
+
def get_job_status(self, job_name: str, namespace: str) -> V1Job:
|
|
586
|
+
"""
|
|
587
|
+
Get job with status of specified name and namespace.
|
|
588
|
+
|
|
589
|
+
:param job_name: Name of Job to fetch.
|
|
590
|
+
:param namespace: Namespace of the Job.
|
|
591
|
+
:return: Job object
|
|
592
|
+
"""
|
|
593
|
+
return self.batch_v1_client.read_namespaced_job_status(
|
|
594
|
+
name=job_name, namespace=namespace, pretty=True
|
|
292
595
|
)
|
|
596
|
+
|
|
597
|
+
def wait_until_job_complete(self, job_name: str, namespace: str, job_poll_interval: float = 10) -> V1Job:
|
|
598
|
+
"""
|
|
599
|
+
Block job of specified name and namespace until it is complete or failed.
|
|
600
|
+
|
|
601
|
+
:param job_name: Name of Job to fetch.
|
|
602
|
+
:param namespace: Namespace of the Job.
|
|
603
|
+
:param job_poll_interval: Interval in seconds between polling the job status
|
|
604
|
+
:return: Job object
|
|
605
|
+
"""
|
|
606
|
+
while True:
|
|
607
|
+
self.log.info("Requesting status for the job '%s' ", job_name)
|
|
608
|
+
job: V1Job = self.get_job_status(job_name=job_name, namespace=namespace)
|
|
609
|
+
if self.is_job_complete(job=job):
|
|
610
|
+
return job
|
|
611
|
+
self.log.info("The job '%s' is incomplete. Sleeping for %i sec.", job_name, job_poll_interval)
|
|
612
|
+
sleep(job_poll_interval)
|
|
613
|
+
|
|
614
|
+
def list_jobs_all_namespaces(self) -> V1JobList:
|
|
615
|
+
"""
|
|
616
|
+
Get list of Jobs from all namespaces.
|
|
617
|
+
|
|
618
|
+
:return: V1JobList object
|
|
619
|
+
"""
|
|
620
|
+
return self.batch_v1_client.list_job_for_all_namespaces(pretty=True)
|
|
621
|
+
|
|
622
|
+
def list_jobs_from_namespace(self, namespace: str) -> V1JobList:
|
|
623
|
+
"""
|
|
624
|
+
Get list of Jobs from dedicated namespace.
|
|
625
|
+
|
|
626
|
+
:param namespace: Namespace of the Job.
|
|
627
|
+
:return: V1JobList object
|
|
628
|
+
"""
|
|
629
|
+
return self.batch_v1_client.list_namespaced_job(namespace=namespace, pretty=True)
|
|
630
|
+
|
|
631
|
+
def is_job_complete(self, job: V1Job) -> bool:
|
|
632
|
+
"""
|
|
633
|
+
Check whether the given job is complete (with success or fail).
|
|
634
|
+
|
|
635
|
+
:return: Boolean indicating that the given job is complete.
|
|
636
|
+
"""
|
|
637
|
+
if status := job.status:
|
|
638
|
+
if conditions := status.conditions:
|
|
639
|
+
if final_condition_types := list(
|
|
640
|
+
c for c in conditions if c.type in JOB_FINAL_STATUS_CONDITION_TYPES and c.status
|
|
641
|
+
):
|
|
642
|
+
s = "s" if len(final_condition_types) > 1 else ""
|
|
643
|
+
self.log.info(
|
|
644
|
+
"The job '%s' state%s: %s",
|
|
645
|
+
job.metadata.name,
|
|
646
|
+
s,
|
|
647
|
+
", ".join(f"{c.type} at {c.last_transition_time}" for c in final_condition_types),
|
|
648
|
+
)
|
|
649
|
+
return True
|
|
650
|
+
return False
|
|
651
|
+
|
|
652
|
+
@staticmethod
|
|
653
|
+
def is_job_failed(job: V1Job) -> str | bool:
|
|
654
|
+
"""
|
|
655
|
+
Check whether the given job is failed.
|
|
656
|
+
|
|
657
|
+
:return: Error message if the job is failed, and False otherwise.
|
|
658
|
+
"""
|
|
659
|
+
if status := job.status:
|
|
660
|
+
conditions = status.conditions or []
|
|
661
|
+
if fail_condition := next((c for c in conditions if c.type == "Failed" and c.status), None):
|
|
662
|
+
return fail_condition.reason
|
|
663
|
+
return False
|
|
664
|
+
|
|
665
|
+
@staticmethod
|
|
666
|
+
def is_job_successful(job: V1Job) -> str | bool:
|
|
667
|
+
"""
|
|
668
|
+
Check whether the given job is completed successfully..
|
|
669
|
+
|
|
670
|
+
:return: Error message if the job is failed, and False otherwise.
|
|
671
|
+
"""
|
|
672
|
+
if status := job.status:
|
|
673
|
+
conditions = status.conditions or []
|
|
674
|
+
return bool(next((c for c in conditions if c.type == "Complete" and c.status), None))
|
|
675
|
+
return False
|
|
676
|
+
|
|
677
|
+
def patch_namespaced_job(self, job_name: str, namespace: str, body: object) -> V1Job:
|
|
678
|
+
"""
|
|
679
|
+
Update the specified Job.
|
|
680
|
+
|
|
681
|
+
:param job_name: name of the Job
|
|
682
|
+
:param namespace: the namespace to run within kubernetes
|
|
683
|
+
:param body: json object with parameters for update
|
|
684
|
+
"""
|
|
685
|
+
return self.batch_v1_client.patch_namespaced_job(
|
|
686
|
+
name=job_name,
|
|
687
|
+
namespace=namespace,
|
|
688
|
+
body=body,
|
|
689
|
+
)
|
|
690
|
+
|
|
691
|
+
def apply_from_yaml_file(
|
|
692
|
+
self,
|
|
693
|
+
api_client: Any = None,
|
|
694
|
+
yaml_file: str | None = None,
|
|
695
|
+
yaml_objects: list[dict] | None = None,
|
|
696
|
+
verbose: bool = False,
|
|
697
|
+
namespace: str = "default",
|
|
698
|
+
):
|
|
699
|
+
"""
|
|
700
|
+
Perform an action from a yaml file.
|
|
701
|
+
|
|
702
|
+
:param api_client: A Kubernetes client application.
|
|
703
|
+
:param yaml_file: Contains the path to yaml file.
|
|
704
|
+
:param yaml_objects: List of YAML objects; used instead of reading the yaml_file.
|
|
705
|
+
:param verbose: If True, print confirmation from create action. Default is False.
|
|
706
|
+
:param namespace: Contains the namespace to create all resources inside. The namespace must
|
|
707
|
+
preexist otherwise the resource creation will fail.
|
|
708
|
+
"""
|
|
709
|
+
utils.create_from_yaml(
|
|
710
|
+
k8s_client=api_client or self.api_client,
|
|
711
|
+
yaml_objects=yaml_objects,
|
|
712
|
+
yaml_file=yaml_file,
|
|
713
|
+
verbose=verbose,
|
|
714
|
+
namespace=namespace or self.get_namespace(),
|
|
715
|
+
)
|
|
716
|
+
|
|
717
|
+
def check_kueue_deployment_running(
|
|
718
|
+
self, name: str, namespace: str, timeout: float = 300.0, polling_period_seconds: float = 2.0
|
|
719
|
+
) -> None:
|
|
720
|
+
_timeout = timeout
|
|
721
|
+
while _timeout > 0:
|
|
722
|
+
try:
|
|
723
|
+
deployment = self.get_deployment_status(name=name, namespace=namespace)
|
|
724
|
+
except Exception as e:
|
|
725
|
+
msg = "Exception occurred while checking for Deployment status."
|
|
726
|
+
self.log.exception(msg)
|
|
727
|
+
raise ValueError(msg) from e
|
|
728
|
+
|
|
729
|
+
deployment_status = V1Deployment.to_dict(deployment)["status"]
|
|
730
|
+
replicas = deployment_status["replicas"]
|
|
731
|
+
ready_replicas = deployment_status["ready_replicas"]
|
|
732
|
+
unavailable_replicas = deployment_status["unavailable_replicas"]
|
|
733
|
+
if (
|
|
734
|
+
replicas is not None
|
|
735
|
+
and ready_replicas is not None
|
|
736
|
+
and unavailable_replicas is None
|
|
737
|
+
and replicas == ready_replicas
|
|
738
|
+
):
|
|
739
|
+
return
|
|
740
|
+
self.log.info("Waiting until Deployment will be ready...")
|
|
741
|
+
sleep(polling_period_seconds)
|
|
742
|
+
|
|
743
|
+
_timeout -= polling_period_seconds
|
|
744
|
+
|
|
745
|
+
raise AirflowException("Deployment timed out")
|
|
746
|
+
|
|
747
|
+
@staticmethod
|
|
748
|
+
def get_yaml_content_from_file(kueue_yaml_url) -> list[dict]:
|
|
749
|
+
"""Download content of YAML file and separate it into several dictionaries."""
|
|
750
|
+
response = requests.get(kueue_yaml_url, allow_redirects=True)
|
|
751
|
+
if response.status_code != 200:
|
|
752
|
+
raise AirflowException("Was not able to read the yaml file from given URL")
|
|
753
|
+
|
|
754
|
+
return list(yaml.safe_load_all(response.text))
|
|
755
|
+
|
|
756
|
+
def test_connection(self):
|
|
757
|
+
try:
|
|
758
|
+
conn = self.get_conn()
|
|
759
|
+
version: client.VersionInfo = client.VersionApi(conn).get_code()
|
|
760
|
+
return True, f"Connection successful. Version Info: {version.to_dict()}"
|
|
761
|
+
except Exception as e:
|
|
762
|
+
return False, str(e)
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
def _get_bool(val) -> bool | None:
|
|
766
|
+
"""Convert val to bool if can be done with certainty; if we cannot infer intention we return None."""
|
|
767
|
+
if isinstance(val, bool):
|
|
768
|
+
return val
|
|
769
|
+
if isinstance(val, str):
|
|
770
|
+
if val.strip().lower() == "true":
|
|
771
|
+
return True
|
|
772
|
+
if val.strip().lower() == "false":
|
|
773
|
+
return False
|
|
774
|
+
return None
|
|
775
|
+
|
|
776
|
+
|
|
777
|
+
class AsyncKubernetesHook(KubernetesHook):
|
|
778
|
+
"""Hook to use Kubernetes SDK asynchronously."""
|
|
779
|
+
|
|
780
|
+
def __init__(self, config_dict: dict | None = None, *args, **kwargs):
|
|
781
|
+
super().__init__(*args, **kwargs)
|
|
782
|
+
|
|
783
|
+
self.config_dict = config_dict
|
|
784
|
+
self._extras: dict | None = None
|
|
785
|
+
|
|
786
|
+
async def _load_config(self):
|
|
787
|
+
"""Return Kubernetes API session for use with requests."""
|
|
788
|
+
in_cluster = self._coalesce_param(self.in_cluster, await self._get_field("in_cluster"))
|
|
789
|
+
cluster_context = self._coalesce_param(self.cluster_context, await self._get_field("cluster_context"))
|
|
790
|
+
kubeconfig_path = await self._get_field("kube_config_path")
|
|
791
|
+
kubeconfig = await self._get_field("kube_config")
|
|
792
|
+
num_selected_configuration = sum(
|
|
793
|
+
1 for o in [in_cluster, kubeconfig, kubeconfig_path, self.config_dict] if o
|
|
794
|
+
)
|
|
795
|
+
|
|
796
|
+
async def api_client_from_kubeconfig_file(_kubeconfig_path: str | None):
|
|
797
|
+
await async_config.load_kube_config(
|
|
798
|
+
config_file=_kubeconfig_path,
|
|
799
|
+
client_configuration=self.client_configuration,
|
|
800
|
+
context=cluster_context,
|
|
801
|
+
)
|
|
802
|
+
return async_client.ApiClient()
|
|
803
|
+
|
|
804
|
+
if num_selected_configuration > 1:
|
|
805
|
+
raise AirflowException(
|
|
806
|
+
"Invalid connection configuration. Options kube_config_path, "
|
|
807
|
+
"kube_config, in_cluster are mutually exclusive. "
|
|
808
|
+
"You can only use one option at a time."
|
|
809
|
+
)
|
|
810
|
+
|
|
811
|
+
if in_cluster:
|
|
812
|
+
self.log.debug(LOADING_KUBE_CONFIG_FILE_RESOURCE.format("within a pod"))
|
|
813
|
+
self._is_in_cluster = True
|
|
814
|
+
async_config.load_incluster_config()
|
|
815
|
+
return async_client.ApiClient()
|
|
816
|
+
|
|
817
|
+
if self.config_dict:
|
|
818
|
+
self.log.debug(LOADING_KUBE_CONFIG_FILE_RESOURCE.format("config dictionary"))
|
|
819
|
+
self._is_in_cluster = False
|
|
820
|
+
await async_config.load_kube_config_from_dict(self.config_dict, context=cluster_context)
|
|
821
|
+
return async_client.ApiClient()
|
|
822
|
+
|
|
823
|
+
if kubeconfig_path is not None:
|
|
824
|
+
self.log.debug("loading kube_config from: %s", kubeconfig_path)
|
|
825
|
+
self._is_in_cluster = False
|
|
826
|
+
return await api_client_from_kubeconfig_file(kubeconfig_path)
|
|
827
|
+
|
|
828
|
+
if kubeconfig is not None:
|
|
829
|
+
async with aiofiles.tempfile.NamedTemporaryFile() as temp_config:
|
|
830
|
+
self.log.debug(
|
|
831
|
+
"Reading kubernetes configuration file from connection "
|
|
832
|
+
"object and writing temporary config file with its content",
|
|
833
|
+
)
|
|
834
|
+
await temp_config.write(kubeconfig.encode())
|
|
835
|
+
await temp_config.flush()
|
|
836
|
+
self._is_in_cluster = False
|
|
837
|
+
return await api_client_from_kubeconfig_file(temp_config.name)
|
|
838
|
+
self.log.debug(LOADING_KUBE_CONFIG_FILE_RESOURCE.format("default configuration file"))
|
|
839
|
+
await async_config.load_kube_config(
|
|
840
|
+
client_configuration=self.client_configuration,
|
|
841
|
+
context=cluster_context,
|
|
842
|
+
)
|
|
843
|
+
|
|
844
|
+
async def get_conn_extras(self) -> dict:
|
|
845
|
+
if self._extras is None:
|
|
846
|
+
if self.conn_id:
|
|
847
|
+
connection = await sync_to_async(self.get_connection)(self.conn_id)
|
|
848
|
+
self._extras = connection.extra_dejson
|
|
849
|
+
else:
|
|
850
|
+
self._extras = {}
|
|
851
|
+
return self._extras
|
|
852
|
+
|
|
853
|
+
async def _get_field(self, field_name):
|
|
854
|
+
if field_name.startswith("extra__"):
|
|
855
|
+
raise ValueError(
|
|
856
|
+
f"Got prefixed name {field_name}; please remove the 'extra__kubernetes__' prefix "
|
|
857
|
+
"when using this method."
|
|
858
|
+
)
|
|
859
|
+
extras = await self.get_conn_extras()
|
|
860
|
+
if field_name in extras:
|
|
861
|
+
return extras.get(field_name)
|
|
862
|
+
prefixed_name = f"extra__kubernetes__{field_name}"
|
|
863
|
+
return extras.get(prefixed_name)
|
|
864
|
+
|
|
865
|
+
@contextlib.asynccontextmanager
|
|
866
|
+
async def get_conn(self) -> async_client.ApiClient:
|
|
867
|
+
kube_client = None
|
|
868
|
+
try:
|
|
869
|
+
kube_client = await self._load_config() or async_client.ApiClient()
|
|
870
|
+
yield kube_client
|
|
871
|
+
finally:
|
|
872
|
+
if kube_client is not None:
|
|
873
|
+
await kube_client.close()
|
|
874
|
+
|
|
875
|
+
async def get_pod(self, name: str, namespace: str) -> V1Pod:
|
|
876
|
+
"""
|
|
877
|
+
Get pod's object.
|
|
878
|
+
|
|
879
|
+
:param name: Name of the pod.
|
|
880
|
+
:param namespace: Name of the pod's namespace.
|
|
881
|
+
"""
|
|
882
|
+
async with self.get_conn() as connection:
|
|
883
|
+
try:
|
|
884
|
+
v1_api = async_client.CoreV1Api(connection)
|
|
885
|
+
pod: V1Pod = await v1_api.read_namespaced_pod(
|
|
886
|
+
name=name,
|
|
887
|
+
namespace=namespace,
|
|
888
|
+
)
|
|
889
|
+
return pod
|
|
890
|
+
except HTTPError as e:
|
|
891
|
+
if hasattr(e, "status") and e.status == 403:
|
|
892
|
+
raise KubernetesApiPermissionError("Permission denied (403) from Kubernetes API.") from e
|
|
893
|
+
raise KubernetesApiError from e
|
|
894
|
+
|
|
895
|
+
async def delete_pod(self, name: str, namespace: str):
|
|
896
|
+
"""
|
|
897
|
+
Delete pod's object.
|
|
898
|
+
|
|
899
|
+
:param name: Name of the pod.
|
|
900
|
+
:param namespace: Name of the pod's namespace.
|
|
901
|
+
"""
|
|
902
|
+
async with self.get_conn() as connection:
|
|
903
|
+
try:
|
|
904
|
+
v1_api = async_client.CoreV1Api(connection)
|
|
905
|
+
await v1_api.delete_namespaced_pod(
|
|
906
|
+
name=name, namespace=namespace, body=client.V1DeleteOptions()
|
|
907
|
+
)
|
|
908
|
+
except async_client.ApiException as e:
|
|
909
|
+
# If the pod is already deleted
|
|
910
|
+
if str(e.status) != "404":
|
|
911
|
+
raise
|
|
912
|
+
|
|
913
|
+
async def read_logs(
|
|
914
|
+
self, name: str, namespace: str, container_name: str | None = None, since_seconds: int | None = None
|
|
915
|
+
) -> list[str]:
|
|
916
|
+
"""
|
|
917
|
+
Read logs inside the pod while starting containers inside.
|
|
918
|
+
|
|
919
|
+
All the logs will be outputted with its timestamp to track
|
|
920
|
+
the logs after the execution of the pod is completed. The
|
|
921
|
+
method is used for async output of the logs only in the pod
|
|
922
|
+
failed it execution or the task was cancelled by the user.
|
|
923
|
+
|
|
924
|
+
:param name: Name of the pod.
|
|
925
|
+
:param namespace: Name of the pod's namespace.
|
|
926
|
+
:param container_name: Name of the container inside the pod.
|
|
927
|
+
:param since_seconds: Only return logs newer than a relative duration in seconds.
|
|
928
|
+
"""
|
|
929
|
+
async with self.get_conn() as connection:
|
|
930
|
+
try:
|
|
931
|
+
v1_api = async_client.CoreV1Api(connection)
|
|
932
|
+
logs = await v1_api.read_namespaced_pod_log(
|
|
933
|
+
name=name,
|
|
934
|
+
namespace=namespace,
|
|
935
|
+
container_name=container_name,
|
|
936
|
+
follow=False,
|
|
937
|
+
timestamps=True,
|
|
938
|
+
since_seconds=since_seconds,
|
|
939
|
+
)
|
|
940
|
+
logs = logs.splitlines()
|
|
941
|
+
return logs
|
|
942
|
+
except HTTPError as e:
|
|
943
|
+
raise KubernetesApiError from e
|
|
944
|
+
|
|
945
|
+
async def get_pod_events(self, name: str, namespace: str) -> CoreV1EventList:
|
|
946
|
+
"""Get pod's events."""
|
|
947
|
+
async with self.get_conn() as connection:
|
|
948
|
+
try:
|
|
949
|
+
v1_api = async_client.CoreV1Api(connection)
|
|
950
|
+
events: CoreV1EventList = await v1_api.list_namespaced_event(
|
|
951
|
+
field_selector=f"involvedObject.name={name}",
|
|
952
|
+
namespace=namespace,
|
|
953
|
+
)
|
|
954
|
+
return events
|
|
955
|
+
except HTTPError as e:
|
|
956
|
+
if hasattr(e, "status") and e.status == 403:
|
|
957
|
+
raise KubernetesApiPermissionError("Permission denied (403) from Kubernetes API.") from e
|
|
958
|
+
raise KubernetesApiError from e
|
|
959
|
+
|
|
960
|
+
async def get_job_status(self, name: str, namespace: str) -> V1Job:
|
|
961
|
+
"""
|
|
962
|
+
Get job's status object.
|
|
963
|
+
|
|
964
|
+
:param name: Name of the pod.
|
|
965
|
+
:param namespace: Name of the pod's namespace.
|
|
966
|
+
"""
|
|
967
|
+
async with self.get_conn() as connection:
|
|
968
|
+
v1_api = async_client.BatchV1Api(connection)
|
|
969
|
+
job: V1Job = await v1_api.read_namespaced_job_status(
|
|
970
|
+
name=name,
|
|
971
|
+
namespace=namespace,
|
|
972
|
+
)
|
|
973
|
+
return job
|
|
974
|
+
|
|
975
|
+
async def wait_until_job_complete(self, name: str, namespace: str, poll_interval: float = 10) -> V1Job:
|
|
976
|
+
"""
|
|
977
|
+
Block job of specified name and namespace until it is complete or failed.
|
|
978
|
+
|
|
979
|
+
:param name: Name of Job to fetch.
|
|
980
|
+
:param namespace: Namespace of the Job.
|
|
981
|
+
:param poll_interval: Interval in seconds between polling the job status
|
|
982
|
+
:return: Job object
|
|
983
|
+
"""
|
|
984
|
+
while True:
|
|
985
|
+
self.log.info("Requesting status for the job '%s' ", name)
|
|
986
|
+
job: V1Job = await self.get_job_status(name=name, namespace=namespace)
|
|
987
|
+
if self.is_job_complete(job=job):
|
|
988
|
+
return job
|
|
989
|
+
self.log.info("The job '%s' is incomplete. Sleeping for %i sec.", name, poll_interval)
|
|
990
|
+
await asyncio.sleep(poll_interval)
|
|
991
|
+
|
|
992
|
+
async def wait_until_container_complete(
|
|
993
|
+
self, name: str, namespace: str, container_name: str, poll_interval: float = 10
|
|
994
|
+
) -> None:
|
|
995
|
+
"""
|
|
996
|
+
Wait for the given container in the given pod to be completed.
|
|
997
|
+
|
|
998
|
+
:param name: Name of Pod to fetch.
|
|
999
|
+
:param namespace: Namespace of the Pod.
|
|
1000
|
+
:param container_name: name of the container within the pod to monitor
|
|
1001
|
+
:param poll_interval: Interval in seconds between polling the container status
|
|
1002
|
+
"""
|
|
1003
|
+
while True:
|
|
1004
|
+
pod = await self.get_pod(name=name, namespace=namespace)
|
|
1005
|
+
if container_is_completed(pod=pod, container_name=container_name):
|
|
1006
|
+
break
|
|
1007
|
+
self.log.info("Waiting for container '%s' state to be completed", container_name)
|
|
1008
|
+
await asyncio.sleep(poll_interval)
|
|
1009
|
+
|
|
1010
|
+
async def wait_until_container_started(
|
|
1011
|
+
self, name: str, namespace: str, container_name: str, poll_interval: float = 10
|
|
1012
|
+
) -> None:
|
|
1013
|
+
"""
|
|
1014
|
+
Wait for the given container in the given pod to be started.
|
|
1015
|
+
|
|
1016
|
+
:param name: Name of Pod to fetch.
|
|
1017
|
+
:param namespace: Namespace of the Pod.
|
|
1018
|
+
:param container_name: name of the container within the pod to monitor
|
|
1019
|
+
:param poll_interval: Interval in seconds between polling the container status
|
|
1020
|
+
"""
|
|
1021
|
+
while True:
|
|
1022
|
+
pod = await self.get_pod(name=name, namespace=namespace)
|
|
1023
|
+
if container_is_running(pod=pod, container_name=container_name):
|
|
1024
|
+
break
|
|
1025
|
+
self.log.info("Waiting for container '%s' state to be running", container_name)
|
|
1026
|
+
await asyncio.sleep(poll_interval)
|