dagster-k8s 0.26.2__tar.gz → 0.28.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {dagster-k8s-0.26.2 → dagster_k8s-0.28.12}/LICENSE +1 -1
  2. {dagster-k8s-0.26.2/dagster_k8s.egg-info → dagster_k8s-0.28.12}/PKG-INFO +17 -4
  3. dagster_k8s-0.28.12/README.md +4 -0
  4. {dagster-k8s-0.26.2 → dagster_k8s-0.28.12}/dagster_k8s/__init__.py +1 -1
  5. {dagster-k8s-0.26.2 → dagster_k8s-0.28.12}/dagster_k8s/client.py +41 -5
  6. dagster_k8s-0.28.12/dagster_k8s/component.py +65 -0
  7. {dagster-k8s-0.26.2 → dagster_k8s-0.28.12}/dagster_k8s/container_context.py +2 -2
  8. {dagster-k8s-0.26.2 → dagster_k8s-0.28.12}/dagster_k8s/executor.py +64 -6
  9. {dagster-k8s-0.26.2 → dagster_k8s-0.28.12}/dagster_k8s/job.py +26 -0
  10. dagster_k8s-0.28.12/dagster_k8s/kubernetes_version.py +1 -0
  11. {dagster-k8s-0.26.2 → dagster_k8s-0.28.12}/dagster_k8s/launcher.py +7 -3
  12. {dagster-k8s-0.26.2 → dagster_k8s-0.28.12}/dagster_k8s/ops/k8s_job_op.py +8 -0
  13. {dagster-k8s-0.26.2 → dagster_k8s-0.28.12}/dagster_k8s/pipes.py +99 -38
  14. dagster_k8s-0.28.12/dagster_k8s/utils.py +62 -0
  15. dagster_k8s-0.28.12/dagster_k8s/version.py +1 -0
  16. {dagster-k8s-0.26.2 → dagster_k8s-0.28.12/dagster_k8s.egg-info}/PKG-INFO +17 -4
  17. {dagster-k8s-0.26.2 → dagster_k8s-0.28.12}/dagster_k8s.egg-info/SOURCES.txt +1 -0
  18. dagster_k8s-0.28.12/dagster_k8s.egg-info/requires.txt +3 -0
  19. {dagster-k8s-0.26.2 → dagster_k8s-0.28.12}/setup.py +4 -3
  20. dagster-k8s-0.26.2/README.md +0 -4
  21. dagster-k8s-0.26.2/dagster_k8s/kubernetes_version.py +0 -1
  22. dagster-k8s-0.26.2/dagster_k8s/utils.py +0 -20
  23. dagster-k8s-0.26.2/dagster_k8s/version.py +0 -1
  24. dagster-k8s-0.26.2/dagster_k8s.egg-info/requires.txt +0 -3
  25. {dagster-k8s-0.26.2 → dagster_k8s-0.28.12}/MANIFEST.in +0 -0
  26. {dagster-k8s-0.26.2 → dagster_k8s-0.28.12}/dagster_k8s/models.py +0 -0
  27. {dagster-k8s-0.26.2 → dagster_k8s-0.28.12}/dagster_k8s/ops/__init__.py +0 -0
  28. {dagster-k8s-0.26.2 → dagster_k8s-0.28.12}/dagster_k8s/py.typed +0 -0
  29. {dagster-k8s-0.26.2 → dagster_k8s-0.28.12}/dagster_k8s/test.py +0 -0
  30. {dagster-k8s-0.26.2 → dagster_k8s-0.28.12}/dagster_k8s.egg-info/dependency_links.txt +0 -0
  31. {dagster-k8s-0.26.2 → dagster_k8s-0.28.12}/dagster_k8s.egg-info/not-zip-safe +0 -0
  32. {dagster-k8s-0.26.2 → dagster_k8s-0.28.12}/dagster_k8s.egg-info/top_level.txt +0 -0
  33. {dagster-k8s-0.26.2 → dagster_k8s-0.28.12}/setup.cfg +0 -0
@@ -186,7 +186,7 @@
186
186
  same "printed page" as the copyright notice for easier
187
187
  identification within third-party archives.
188
188
 
189
- Copyright 2023 Dagster Labs, Inc".
189
+ Copyright 2025 Dagster Labs, Inc.
190
190
 
191
191
  Licensed under the Apache License, Version 2.0 (the "License");
192
192
  you may not use this file except in compliance with the License.
@@ -1,16 +1,29 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: dagster-k8s
3
- Version: 0.26.2
3
+ Version: 0.28.12
4
4
  Summary: A Dagster integration for k8s
5
5
  Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-k8s
6
6
  Author: Dagster Labs
7
7
  Author-email: hello@dagsterlabs.com
8
8
  License: Apache-2.0
9
- Classifier: Programming Language :: Python :: 3.9
10
9
  Classifier: Programming Language :: Python :: 3.10
11
10
  Classifier: Programming Language :: Python :: 3.11
12
11
  Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Classifier: Programming Language :: Python :: 3.14
13
14
  Classifier: License :: OSI Approved :: Apache Software License
14
15
  Classifier: Operating System :: OS Independent
15
- Requires-Python: >=3.9,<3.13
16
+ Requires-Python: >=3.10,<3.15
16
17
  License-File: LICENSE
18
+ Requires-Dist: dagster==1.12.12
19
+ Requires-Dist: kubernetes<36
20
+ Requires-Dist: google-auth!=2.23.1
21
+ Dynamic: author
22
+ Dynamic: author-email
23
+ Dynamic: classifier
24
+ Dynamic: home-page
25
+ Dynamic: license
26
+ Dynamic: license-file
27
+ Dynamic: requires-dist
28
+ Dynamic: requires-python
29
+ Dynamic: summary
@@ -0,0 +1,4 @@
1
+ # dagster-k8s
2
+
3
+ The docs for `dagster-k8s` can be found
4
+ [here](https://docs.dagster.io/integrations/libraries/k8s/dagster-k8s).
@@ -1,4 +1,4 @@
1
- from dagster._core.libraries import DagsterLibraryRegistry
1
+ from dagster_shared.libraries import DagsterLibraryRegistry
2
2
 
3
3
  from dagster_k8s.executor import k8s_job_executor as k8s_job_executor
4
4
  from dagster_k8s.job import (
@@ -1,12 +1,15 @@
1
1
  import logging
2
+ import os
2
3
  import sys
3
4
  import time
5
+ from collections.abc import Callable
4
6
  from enum import Enum
5
- from typing import Any, Callable, Optional, TypeVar
7
+ from typing import Any, Optional, TypeVar
6
8
 
7
9
  import kubernetes.client
8
10
  import kubernetes.client.rest
9
11
  import six
12
+ import urllib3.exceptions
10
13
  from dagster import (
11
14
  DagsterInstance,
12
15
  _check as check,
@@ -22,12 +25,14 @@ try:
22
25
  except ImportError:
23
26
  K8S_EVENTS_API_PRESENT = False
24
27
 
28
+ logger = logging.getLogger(__name__)
25
29
 
26
30
  T = TypeVar("T")
27
31
 
28
32
  DEFAULT_WAIT_TIMEOUT = 86400.0 # 1 day
29
33
  DEFAULT_WAIT_BETWEEN_ATTEMPTS = 10.0 # 10 seconds
30
34
  DEFAULT_JOB_POD_COUNT = 1 # expect job:pod to be 1:1 by default
35
+ DEFAULT_JOB_CREATION_TIMEOUT = 10.0 # 10 seconds
31
36
 
32
37
 
33
38
  class WaitForPodState(Enum):
@@ -222,6 +227,20 @@ def k8s_api_retry_creation_mutation(
222
227
  k8s_api_exception=e,
223
228
  original_exc_info=sys.exc_info(),
224
229
  ) from e
230
+ except urllib3.exceptions.HTTPError as e:
231
+ # Temporary for recovery detection
232
+ logger.error(
233
+ f"k8s_api_retry_creation_mutation: {e.__module__}.{e.__class__.__name__}: {e!s}"
234
+ )
235
+ if remaining_attempts > 0:
236
+ time.sleep(timeout)
237
+ else:
238
+ raise DagsterK8sAPIRetryLimitExceeded(
239
+ msg_fn(),
240
+ k8s_api_exception=e,
241
+ max_retries=max_retries,
242
+ original_exc_info=sys.exc_info(),
243
+ ) from e
225
244
  check.failed("Unreachable.")
226
245
 
227
246
 
@@ -531,6 +550,18 @@ class DagsterKubernetesClient:
531
550
 
532
551
  ### Pod operations ###
533
552
 
553
+ def get_pod_by_name(self, pod_name: str, namespace: str):
554
+ """Get a pod by name.
555
+
556
+ Args:
557
+ pod_name (str): Name of the pod to get.
558
+ namespace (str): Namespace in which the pod is located.
559
+ """
560
+ check.str_param(pod_name, "pod_name")
561
+ check.str_param(namespace, "namespace")
562
+
563
+ return self.core_api.read_namespaced_pod(pod_name, namespace=namespace)
564
+
534
565
  def get_pods_in_job(self, job_name, namespace):
535
566
  """Get the pods launched by the job ``job_name``.
536
567
 
@@ -740,13 +771,16 @@ class DagsterKubernetesClient:
740
771
  elif state.terminated is not None:
741
772
  container_name = container_status.name
742
773
  if state.terminated.exit_code != 0:
774
+ tail_lines = int(
775
+ os.getenv("DAGSTER_K8S_WAIT_FOR_POD_FAILURE_LOG_LINE_COUNT", "100")
776
+ )
743
777
  raw_logs = self.retrieve_pod_logs(
744
- pod_name, namespace, container_name=container_name
778
+ pod_name, namespace, container_name=container_name, tail_lines=tail_lines
745
779
  )
746
780
  message = state.terminated.message
747
781
  msg = (
748
- f'Container "{container_name}" failed with message: "{message}" '
749
- f'and pod logs: "{raw_logs}"'
782
+ f'Container "{container_name}" failed with message: "{message}". '
783
+ f'Last {tail_lines} log lines: "{raw_logs}"'
750
784
  )
751
785
 
752
786
  self.logger(msg)
@@ -1011,7 +1045,9 @@ class DagsterKubernetesClient:
1011
1045
  wait_time_between_attempts: float = DEFAULT_WAIT_BETWEEN_ATTEMPTS,
1012
1046
  ) -> None:
1013
1047
  k8s_api_retry_creation_mutation(
1014
- lambda: self.batch_api.create_namespaced_job(body=body, namespace=namespace),
1048
+ lambda: self.batch_api.create_namespaced_job(
1049
+ body=body, namespace=namespace, _request_timeout=DEFAULT_JOB_CREATION_TIMEOUT
1050
+ ),
1015
1051
  max_retries=3,
1016
1052
  timeout=wait_time_between_attempts,
1017
1053
  )
@@ -0,0 +1,65 @@
1
+ from collections.abc import Mapping, Sequence
2
+ from dataclasses import dataclass
3
+ from functools import cached_property
4
+ from typing import Any, Optional, Union
5
+
6
+ from dagster import (
7
+ AssetExecutionContext,
8
+ AssetsDefinition,
9
+ Component,
10
+ ComponentLoadContext,
11
+ Definitions,
12
+ Resolvable,
13
+ ResolvedAssetSpec,
14
+ multi_asset,
15
+ )
16
+
17
+ from dagster_k8s.pipes import PipesK8sClient, build_pod_body
18
+
19
+
20
+ @dataclass
21
+ class PipesK8sComponent(Component, Resolvable):
22
+ """Component that creates assets backed by kubernetes pod execution via Dagster Pipes."""
23
+
24
+ name: str
25
+ assets: Sequence[ResolvedAssetSpec]
26
+ image: Optional[str] = None
27
+ command: Optional[Union[str, Sequence[str]]] = None
28
+ namespace: Optional[str] = None
29
+ env: Optional[Mapping[str, str]] = None
30
+ base_pod_meta: Optional[Mapping[str, Any]] = None
31
+ base_pod_spec: Optional[Mapping[str, Any]] = None
32
+
33
+ def __post_init__(self):
34
+ # validate that we can build a pod for the given args
35
+ # i.e. image or base_pod_self.image
36
+ build_pod_body(
37
+ pod_name=self.name,
38
+ image=self.image,
39
+ command=self.command,
40
+ env_vars=self.env or {},
41
+ base_pod_meta=self.base_pod_meta,
42
+ base_pod_spec=self.base_pod_spec,
43
+ )
44
+
45
+ @cached_property
46
+ def client(self):
47
+ return PipesK8sClient()
48
+
49
+ def build_defs(self, context: ComponentLoadContext):
50
+ return Definitions(assets=[self.build_asset()])
51
+
52
+ def build_asset(self) -> AssetsDefinition:
53
+ @multi_asset(name=self.name, specs=self.assets)
54
+ def _asset(context: AssetExecutionContext):
55
+ return self.client.run(
56
+ context=context,
57
+ image=self.image,
58
+ command=self.command,
59
+ namespace=self.namespace,
60
+ env=self.env,
61
+ base_pod_meta=self.base_pod_meta,
62
+ base_pod_spec=self.base_pod_spec,
63
+ ).get_results()
64
+
65
+ return _asset
@@ -11,7 +11,7 @@ from dagster._core.container_context import process_shared_container_context_con
11
11
  from dagster._core.errors import DagsterInvalidConfigError
12
12
  from dagster._core.storage.dagster_run import DagsterRun
13
13
  from dagster._core.utils import parse_env_var
14
- from dagster._utils import hash_collection
14
+ from dagster_shared.utils.hash import hash_collection
15
15
 
16
16
  if TYPE_CHECKING:
17
17
  from dagster_k8s import K8sRunLauncher
@@ -528,7 +528,7 @@ class K8sContainerContext(
528
528
  run_k8s_container_context,
529
529
  )
530
530
 
531
- processed_context_value = cast(dict, processed_container_context.value)
531
+ processed_context_value = cast("dict", processed_container_context.value)
532
532
 
533
533
  return shared_container_context.merge(
534
534
  K8sContainerContext(
@@ -1,3 +1,4 @@
1
+ import os
1
2
  from collections.abc import Iterator
2
3
  from typing import Optional, cast
3
4
 
@@ -15,6 +16,10 @@ from dagster._core.definitions.executor_definition import multiple_process_execu
15
16
  from dagster._core.definitions.metadata import MetadataValue
16
17
  from dagster._core.events import DagsterEvent, EngineEventData
17
18
  from dagster._core.execution.retries import RetryMode, get_retries_config
19
+ from dagster._core.execution.step_dependency_config import (
20
+ StepDependencyConfig,
21
+ get_step_dependency_config_field,
22
+ )
18
23
  from dagster._core.execution.tags import get_tag_concurrency_limits_config
19
24
  from dagster._core.executor.base import Executor
20
25
  from dagster._core.executor.init import InitExecutorContext
@@ -24,6 +29,7 @@ from dagster._core.executor.step_delegating import (
24
29
  StepHandler,
25
30
  StepHandlerContext,
26
31
  )
32
+ from dagster._utils.cached_method import cached_method
27
33
  from dagster._utils.merger import merge_dicts
28
34
 
29
35
  from dagster_k8s.client import DagsterKubernetesClient
@@ -31,12 +37,14 @@ from dagster_k8s.container_context import K8sContainerContext
31
37
  from dagster_k8s.job import (
32
38
  USER_DEFINED_K8S_JOB_CONFIG_SCHEMA,
33
39
  DagsterK8sJobConfig,
40
+ OwnerReference,
34
41
  UserDefinedDagsterK8sConfig,
35
42
  construct_dagster_k8s_job,
36
43
  get_k8s_job_name,
37
44
  get_user_defined_k8s_config,
38
45
  )
39
46
  from dagster_k8s.launcher import K8sRunLauncher
47
+ from dagster_k8s.utils import get_deployment_id_label
40
48
 
41
49
  _K8S_EXECUTOR_CONFIG_SCHEMA = merge_dicts(
42
50
  DagsterK8sJobConfig.config_type_job(),
@@ -81,6 +89,15 @@ _K8S_EXECUTOR_CONFIG_SCHEMA = merge_dicts(
81
89
  default_value={},
82
90
  description="Per op k8s configuration overrides.",
83
91
  ),
92
+ "enable_owner_references": Field(
93
+ bool,
94
+ is_required=False,
95
+ default_value=False,
96
+ description="Whether to insert Kubernetes owner references on step jobs to their parent run pod."
97
+ " This ensures that step jobs and step pods are garbage collected when the run pod is deleted."
98
+ " For more information, see https://kubernetes.io/docs/concepts/overview/working-with-objects/owners-dependents/",
99
+ ),
100
+ "step_dependency_config": get_step_dependency_config_field(),
84
101
  },
85
102
  )
86
103
 
@@ -154,12 +171,12 @@ def k8s_job_executor(init_context: InitExecutorContext) -> Executor:
154
171
  )
155
172
 
156
173
  if "load_incluster_config" in exc_cfg:
157
- load_incluster_config = cast(bool, exc_cfg["load_incluster_config"])
174
+ load_incluster_config = cast("bool", exc_cfg["load_incluster_config"])
158
175
  else:
159
176
  load_incluster_config = run_launcher.load_incluster_config if run_launcher else True
160
177
 
161
178
  if "kubeconfig_file" in exc_cfg:
162
- kubeconfig_file = cast(Optional[str], exc_cfg["kubeconfig_file"])
179
+ kubeconfig_file = cast("Optional[str]", exc_cfg["kubeconfig_file"])
163
180
  else:
164
181
  kubeconfig_file = run_launcher.kubeconfig_file if run_launcher else None
165
182
 
@@ -170,11 +187,17 @@ def k8s_job_executor(init_context: InitExecutorContext) -> Executor:
170
187
  load_incluster_config=load_incluster_config,
171
188
  kubeconfig_file=kubeconfig_file,
172
189
  per_step_k8s_config=exc_cfg.get("per_step_k8s_config", {}),
190
+ enable_owner_references=check.opt_bool_param(
191
+ exc_cfg.get("enable_owner_references"), "enable_owner_references", False
192
+ ),
173
193
  ),
174
194
  retries=RetryMode.from_config(exc_cfg["retries"]), # type: ignore
175
195
  max_concurrent=check.opt_int_elem(exc_cfg, "max_concurrent"),
176
196
  tag_concurrency_limits=check.opt_list_elem(exc_cfg, "tag_concurrency_limits"),
177
197
  should_verify_step=True,
198
+ step_dependency_config=StepDependencyConfig.from_config(
199
+ exc_cfg.get("step_dependency_config") # type: ignore
200
+ ),
178
201
  )
179
202
 
180
203
 
@@ -190,7 +213,9 @@ class K8sStepHandler(StepHandler):
190
213
  load_incluster_config: bool,
191
214
  kubeconfig_file: Optional[str],
192
215
  k8s_client_batch_api=None,
216
+ k8s_client_core_api=None,
193
217
  per_step_k8s_config=None,
218
+ enable_owner_references=False,
194
219
  ):
195
220
  super().__init__()
196
221
 
@@ -198,7 +223,7 @@ class K8sStepHandler(StepHandler):
198
223
  self._executor_container_context = check.inst_param(
199
224
  container_context, "container_context", K8sContainerContext
200
225
  )
201
-
226
+ self._kubeconfig_file = None
202
227
  if load_incluster_config:
203
228
  check.invariant(
204
229
  kubeconfig_file is None,
@@ -208,17 +233,20 @@ class K8sStepHandler(StepHandler):
208
233
  else:
209
234
  check.opt_str_param(kubeconfig_file, "kubeconfig_file")
210
235
  kubernetes.config.load_kube_config(kubeconfig_file)
236
+ self._kubeconfig_file = kubeconfig_file
211
237
 
212
238
  self._api_client = DagsterKubernetesClient.production_client(
213
- batch_api_override=k8s_client_batch_api
239
+ batch_api_override=k8s_client_batch_api,
240
+ core_api_override=k8s_client_core_api,
214
241
  )
215
242
  self._per_step_k8s_config = check.opt_dict_param(
216
243
  per_step_k8s_config, "per_step_k8s_config", key_type=str, value_type=dict
217
244
  )
245
+ self._enable_owner_references = enable_owner_references
218
246
 
219
247
  def _get_step_key(self, step_handler_context: StepHandlerContext) -> str:
220
248
  step_keys_to_execute = cast(
221
- list[str], step_handler_context.execute_step_args.step_keys_to_execute
249
+ "list[str]", step_handler_context.execute_step_args.step_keys_to_execute
222
250
  )
223
251
  assert len(step_keys_to_execute) == 1, "Launching multiple steps is not currently supported"
224
252
  return step_keys_to_execute[0]
@@ -230,7 +258,7 @@ class K8sStepHandler(StepHandler):
230
258
 
231
259
  context = K8sContainerContext.create_for_run(
232
260
  step_handler_context.dagster_run,
233
- cast(K8sRunLauncher, step_handler_context.instance.run_launcher),
261
+ cast("K8sRunLauncher", step_handler_context.instance.run_launcher),
234
262
  include_run_tags=False, # For now don't include job-level dagster-k8s/config tags in step pods
235
263
  )
236
264
  context = context.merge(self._executor_container_context)
@@ -263,6 +291,25 @@ class K8sStepHandler(StepHandler):
263
291
 
264
292
  return f"dagster-step-{name_key}"
265
293
 
294
+ @cached_method
295
+ def _detect_current_name_and_uid(
296
+ self,
297
+ ) -> Optional[tuple[str, str]]:
298
+ """Get the current pod's pod name and uid, if available."""
299
+ from dagster_k8s.utils import detect_current_namespace
300
+
301
+ hostname = os.getenv("HOSTNAME")
302
+ if not hostname:
303
+ return None
304
+
305
+ namespace = detect_current_namespace(self._kubeconfig_file)
306
+ if not namespace:
307
+ return None
308
+
309
+ pod = self._api_client.get_pod_by_name(pod_name=hostname, namespace=namespace)
310
+
311
+ return pod.metadata.name, pod.metadata.uid
312
+
266
313
  def launch_step(self, step_handler_context: StepHandlerContext) -> Iterator[DagsterEvent]:
267
314
  step_key = self._get_step_key(step_handler_context)
268
315
 
@@ -297,6 +344,16 @@ class K8sStepHandler(StepHandler):
297
344
  labels["dagster/code-location"] = (
298
345
  run.remote_job_origin.repository_origin.code_location_origin.location_name
299
346
  )
347
+ deployment_name_env_var = get_deployment_id_label(container_context.run_k8s_config)
348
+ if deployment_name_env_var:
349
+ labels["dagster/deployment-name"] = deployment_name_env_var
350
+
351
+ owner_references = []
352
+ if self._enable_owner_references:
353
+ my_pod = self._detect_current_name_and_uid()
354
+ if my_pod:
355
+ owner_references = [OwnerReference(kind="Pod", name=my_pod[0], uid=my_pod[1])]
356
+
300
357
  job = construct_dagster_k8s_job(
301
358
  job_config=job_config,
302
359
  args=args,
@@ -313,6 +370,7 @@ class K8sStepHandler(StepHandler):
313
370
  },
314
371
  {"name": "DAGSTER_RUN_STEP_KEY", "value": step_key},
315
372
  ],
373
+ owner_references=owner_references,
316
374
  )
317
375
 
318
376
  yield DagsterEvent.step_worker_starting(
@@ -85,6 +85,20 @@ DEFAULT_JOB_SPEC_CONFIG = {
85
85
  }
86
86
 
87
87
 
88
+ class OwnerReference(NamedTuple):
89
+ kind: str
90
+ name: str
91
+ uid: str
92
+
93
+ def to_dict(self) -> dict[str, Any]:
94
+ return {
95
+ "api_version": "batch/v1" if self.kind == "Job" else "v1",
96
+ "kind": self.kind,
97
+ "name": self.name,
98
+ "uid": self.uid,
99
+ }
100
+
101
+
88
102
  class UserDefinedDagsterK8sConfig(
89
103
  NamedTuple(
90
104
  "_UserDefinedDagsterK8sConfig",
@@ -754,6 +768,7 @@ def construct_dagster_k8s_job(
754
768
  component: Optional[str] = None,
755
769
  labels: Optional[Mapping[str, str]] = None,
756
770
  env_vars: Optional[Sequence[Mapping[str, Any]]] = None,
771
+ owner_references: Optional[Sequence[OwnerReference]] = None,
757
772
  ) -> kubernetes.client.V1Job:
758
773
  """Constructs a Kubernetes Job object.
759
774
 
@@ -929,6 +944,16 @@ def construct_dagster_k8s_job(
929
944
  user_defined_job_metadata = copy.deepcopy(dict(user_defined_k8s_config.job_metadata))
930
945
  user_defined_job_labels = user_defined_job_metadata.pop("labels", {})
931
946
 
947
+ owner_reference_dicts = (
948
+ [owner_reference.to_dict() for owner_reference in owner_references]
949
+ if owner_references
950
+ else []
951
+ )
952
+ if "owner_references" in user_defined_job_metadata:
953
+ user_defined_job_metadata["owner_references"] = (
954
+ owner_reference_dicts + user_defined_job_metadata["owner_references"]
955
+ )
956
+
932
957
  job = k8s_model_from_dict(
933
958
  kubernetes.client.V1Job,
934
959
  merge_dicts(
@@ -944,6 +969,7 @@ def construct_dagster_k8s_job(
944
969
  dagster_labels, user_defined_job_labels, job_config.labels
945
970
  ),
946
971
  },
972
+ {"owner_references": owner_reference_dicts} if owner_reference_dicts else {},
947
973
  ),
948
974
  "spec": job_spec_config,
949
975
  },
@@ -0,0 +1 @@
1
+ KUBERNETES_VERSION_UPPER_BOUND = "36"
@@ -18,6 +18,7 @@ from dagster._utils.error import serializable_error_info_from_exc_info
18
18
  from dagster_k8s.client import DagsterKubernetesClient
19
19
  from dagster_k8s.container_context import K8sContainerContext
20
20
  from dagster_k8s.job import DagsterK8sJobConfig, construct_dagster_k8s_job, get_job_name_from_run_id
21
+ from dagster_k8s.utils import get_deployment_id_label
21
22
 
22
23
 
23
24
  class K8sRunLauncher(RunLauncher, ConfigurableClass):
@@ -231,6 +232,9 @@ class K8sRunLauncher(RunLauncher, ConfigurableClass):
231
232
  "dagster/job": job_origin.job_name,
232
233
  "dagster/run-id": run.run_id,
233
234
  }
235
+ deployment_name_env_var = get_deployment_id_label(user_defined_k8s_config)
236
+ if deployment_name_env_var:
237
+ labels["dagster/deployment-name"] = deployment_name_env_var
234
238
  if run.remote_job_origin:
235
239
  labels["dagster/code-location"] = (
236
240
  run.remote_job_origin.repository_origin.code_location_origin.location_name
@@ -319,7 +323,7 @@ class K8sRunLauncher(RunLauncher, ConfigurableClass):
319
323
  return None
320
324
  return self._instance.count_resume_run_attempts(run.run_id)
321
325
 
322
- def terminate(self, run_id):
326
+ def terminate(self, run_id): # pyright: ignore[reportIncompatibleMethodOverride]
323
327
  check.str_param(run_id, "run_id")
324
328
  run = self._instance.get_run_by_id(run_id)
325
329
 
@@ -445,8 +449,8 @@ class K8sRunLauncher(RunLauncher, ConfigurableClass):
445
449
  WorkerStatus.FAILED, "Run has not completed but K8s job has no active pods"
446
450
  )
447
451
 
448
- if status.failed:
449
- return CheckRunHealthResult(WorkerStatus.FAILED, "K8s job failed")
450
452
  if status.succeeded:
451
453
  return CheckRunHealthResult(WorkerStatus.SUCCESS)
454
+ if status.failed and not status.active:
455
+ return CheckRunHealthResult(WorkerStatus.FAILED, "K8s job failed")
452
456
  return CheckRunHealthResult(WorkerStatus.RUNNING)
@@ -313,6 +313,14 @@ def execute_k8s_job(
313
313
  labels["dagster/code-location"] = (
314
314
  context.dagster_run.remote_job_origin.repository_origin.code_location_origin.location_name
315
315
  )
316
+ env = user_defined_k8s_config.container_config.get("env")
317
+ deployment_name_env_var = (
318
+ next((entry for entry in env if entry["name"] == "DAGSTER_CLOUD_DEPLOYMENT_NAME"), None)
319
+ if env
320
+ else None
321
+ )
322
+ if deployment_name_env_var:
323
+ labels["dagster/deployment-name"] = deployment_name_env_var["value"]
316
324
 
317
325
  job = construct_dagster_k8s_job(
318
326
  job_config=k8s_job_config,
@@ -8,7 +8,6 @@ import time
8
8
  from collections.abc import Callable, Generator, Iterator, Mapping, Sequence
9
9
  from contextlib import contextmanager
10
10
  from datetime import datetime
11
- from pathlib import Path
12
11
  from typing import Any, Optional, Union
13
12
 
14
13
  import kubernetes
@@ -34,6 +33,7 @@ from dagster._core.pipes.utils import (
34
33
  extract_message_or_forward_to_stdout,
35
34
  open_pipes_session,
36
35
  )
36
+ from dagster._time import parse_time_string
37
37
  from dagster_pipes import (
38
38
  DAGSTER_PIPES_CONTEXT_ENV_VAR,
39
39
  DAGSTER_PIPES_MESSAGES_ENV_VAR,
@@ -41,14 +41,16 @@ from dagster_pipes import (
41
41
  PipesExtras,
42
42
  encode_env_var,
43
43
  )
44
+ from urllib3.exceptions import ReadTimeoutError
44
45
 
45
46
  from dagster_k8s.client import (
46
47
  DEFAULT_WAIT_BETWEEN_ATTEMPTS,
48
+ DEFAULT_WAIT_TIMEOUT,
47
49
  DagsterKubernetesClient,
48
50
  WaitForPodState,
49
51
  )
50
52
  from dagster_k8s.models import k8s_model_from_dict, k8s_snake_case_dict
51
- from dagster_k8s.utils import get_common_labels
53
+ from dagster_k8s.utils import detect_current_namespace, get_common_labels
52
54
 
53
55
  INIT_WAIT_TIMEOUT_FOR_READY = 1800.0 # 30mins
54
56
  INIT_WAIT_TIMEOUT_FOR_TERMINATE = 10.0 # 10s
@@ -62,9 +64,13 @@ def get_pod_name(run_id: str, op_name: str):
62
64
 
63
65
 
64
66
  DEFAULT_CONTAINER_NAME = "dagster-pipes-execution"
65
- _NAMESPACE_SECRET_PATH = Path("/var/run/secrets/kubernetes.io/serviceaccount/namespace")
66
67
  _DEV_NULL_MESSAGE_WRITER = encode_env_var({"path": "/dev/null"})
67
68
 
69
+ DEFAULT_CONSUME_POD_LOGS_RETRIES = 5
70
+
71
+ # By default, timeout and reconnect to the log stream every hour.
72
+ DEFAULT_DAGSTER_PIPES_K8S_CONSUME_POD_LOGS_REQUEST_TIMEOUT = 3600
73
+
68
74
 
69
75
  class PipesK8sPodLogsMessageReader(PipesMessageReader):
70
76
  """Message reader that reads messages from kubernetes pod logs."""
@@ -80,24 +86,97 @@ class PipesK8sPodLogsMessageReader(PipesMessageReader):
80
86
  finally:
81
87
  self._handler = None
82
88
 
89
+ def _get_consume_logs_request_timeout(self) -> Optional[int]:
90
+ request_timeout_env_var = os.getenv("DAGSTER_PIPES_K8S_CONSUME_POD_LOGS_REQUEST_TIMEOUT")
91
+ if request_timeout_env_var:
92
+ return int(request_timeout_env_var)
93
+
94
+ return DEFAULT_DAGSTER_PIPES_K8S_CONSUME_POD_LOGS_REQUEST_TIMEOUT
95
+
83
96
  def consume_pod_logs(
84
97
  self,
98
+ context: Union[OpExecutionContext, AssetExecutionContext],
85
99
  core_api: kubernetes.client.CoreV1Api,
86
100
  pod_name: str,
87
101
  namespace: str,
88
102
  ):
103
+ last_seen_timestamp = None
104
+ catching_up_after_retry = False
105
+
106
+ request_timeout = self._get_consume_logs_request_timeout()
107
+
108
+ retries_remaining = int(
109
+ os.getenv(
110
+ "DAGSTER_PIPES_K8S_CONSUME_POD_LOGS_RETRIES", str(DEFAULT_CONSUME_POD_LOGS_RETRIES)
111
+ )
112
+ )
113
+
89
114
  handler = check.not_none(
90
115
  self._handler, "can only consume logs within scope of context manager"
91
116
  )
92
- for line in core_api.read_namespaced_pod_log(
93
- pod_name,
94
- namespace,
95
- follow=True,
96
- _preload_content=False, # avoid JSON processing
97
- ).stream():
98
- log_chunk = line.decode("utf-8")
99
- for log_line in log_chunk.split("\n"):
100
- extract_message_or_forward_to_stdout(handler, log_line)
117
+
118
+ while True:
119
+ # On retry, re-connect to the log stream for new messages since the last seen timestamp
120
+ # (with a buffer to ensure none are missed). The messages are deduplicated by timestamp below.
121
+ if last_seen_timestamp:
122
+ since_seconds = int(
123
+ max(time.time() - parse_time_string(last_seen_timestamp).timestamp(), 0)
124
+ + int(os.getenv("DAGSTER_PIPES_K8S_CONSUME_POD_LOGS_BUFFER_SECONDS", "300"))
125
+ )
126
+ else:
127
+ since_seconds = None
128
+
129
+ try:
130
+ for log_item in _process_log_stream(
131
+ core_api.read_namespaced_pod_log(
132
+ pod_name,
133
+ namespace,
134
+ follow=True,
135
+ timestamps=True,
136
+ since_seconds=since_seconds,
137
+ _preload_content=False, # avoid JSON processing
138
+ _request_timeout=request_timeout,
139
+ )
140
+ ):
141
+ timestamp = log_item.timestamp
142
+ message = log_item.log
143
+
144
+ if (
145
+ catching_up_after_retry
146
+ and timestamp
147
+ and last_seen_timestamp
148
+ and timestamp <= last_seen_timestamp
149
+ ):
150
+ # This is a log that we've already seen before from before we retried
151
+ continue
152
+ else:
153
+ catching_up_after_retry = False
154
+ extract_message_or_forward_to_stdout(handler, message)
155
+ if timestamp:
156
+ last_seen_timestamp = (
157
+ max(last_seen_timestamp, timestamp)
158
+ if last_seen_timestamp
159
+ else timestamp
160
+ )
161
+ return
162
+ except Exception as e:
163
+ # Expected read timeouts can occur for long-running pods if a request timeout is set
164
+ expected_read_timeout = isinstance(e, ReadTimeoutError) and request_timeout
165
+
166
+ if expected_read_timeout:
167
+ # Expected so doesn't need to be logged to event log, but write to stdout
168
+ # for visibility
169
+ logging.getLogger("dagster").info("Re-connecting to pod logs stream")
170
+ else:
171
+ if retries_remaining == 0:
172
+ raise
173
+ retries_remaining -= 1
174
+ context.log.warning(
175
+ f"Error consuming pod logs. {retries_remaining} retr{('y' if retries_remaining == 1 else 'ies')} remaining",
176
+ exc_info=True,
177
+ )
178
+
179
+ catching_up_after_retry = True
101
180
 
102
181
  @contextmanager
103
182
  def async_consume_pod_logs(
@@ -359,7 +438,7 @@ class PipesK8sClient(PipesClient, TreatAsResourceParam):
359
438
  )
360
439
 
361
440
  @public
362
- def run(
441
+ def run( # pyright: ignore[reportIncompatibleMethodOverride]
363
442
  self,
364
443
  *,
365
444
  context: Union[OpExecutionContext, AssetExecutionContext],
@@ -372,6 +451,7 @@ class PipesK8sClient(PipesClient, TreatAsResourceParam):
372
451
  base_pod_spec: Optional[Mapping[str, Any]] = None,
373
452
  ignore_containers: Optional[set] = None,
374
453
  enable_multi_container_logs: bool = False,
454
+ pod_wait_timeout: float = DEFAULT_WAIT_TIMEOUT,
375
455
  ) -> PipesClientCompletedInvocation:
376
456
  """Publish a kubernetes pod and wait for it to complete, enriched with the pipes protocol.
377
457
 
@@ -407,6 +487,8 @@ class PipesK8sClient(PipesClient, TreatAsResourceParam):
407
487
  ignore_containers (Optional[Set]): Ignore certain containers from waiting for termination. Defaults to
408
488
  None.
409
489
  enable_multi_container_logs (bool): Whether or not to enable multi-container log consumption.
490
+ pod_wait_timeout (float): How long to wait for the pod to terminate before raising an exception.
491
+ Defaults to 24h. Set to 0 to disable.
410
492
 
411
493
  Returns:
412
494
  PipesClientCompletedInvocation: Wrapper containing results reported by the external
@@ -422,7 +504,7 @@ class PipesK8sClient(PipesClient, TreatAsResourceParam):
422
504
  context_injector=self.context_injector,
423
505
  message_reader=self.message_reader,
424
506
  ) as pipes_session:
425
- namespace = namespace or _detect_current_namespace(self.kubeconfig_file) or "default"
507
+ namespace = namespace or detect_current_namespace(self.kubeconfig_file) or "default"
426
508
  pod_name = get_pod_name(context.run_id, context.op.name)
427
509
  pod_body = build_pod_body(
428
510
  pod_name=pod_name,
@@ -446,13 +528,14 @@ class PipesK8sClient(PipesClient, TreatAsResourceParam):
446
528
  pod_name=pod_name,
447
529
  enable_multi_container_logs=enable_multi_container_logs,
448
530
  ):
449
- # We need to wait for the pod to start up so that the log streaming is successful afterwards.
531
+ # wait until the pod is fully terminated (or raise an exception if it failed)
450
532
  client.wait_for_pod(
451
533
  pod_name,
452
534
  namespace,
453
535
  wait_for_state=WaitForPodState.Terminated,
454
536
  ignore_containers=ignore_containers,
455
537
  wait_time_between_attempts=self.poll_interval,
538
+ wait_timeout=pod_wait_timeout,
456
539
  )
457
540
  finally:
458
541
  client.core_api.delete_namespaced_pod(pod_name, namespace)
@@ -504,6 +587,7 @@ class PipesK8sClient(PipesClient, TreatAsResourceParam):
504
587
  return
505
588
  else:
506
589
  self.message_reader.consume_pod_logs(
590
+ context=context,
507
591
  core_api=client.core_api,
508
592
  namespace=namespace,
509
593
  pod_name=pod_name,
@@ -512,29 +596,6 @@ class PipesK8sClient(PipesClient, TreatAsResourceParam):
512
596
  yield
513
597
 
514
598
 
515
- def _detect_current_namespace(
516
- kubeconfig_file: Optional[str], namespace_secret_path: Path = _NAMESPACE_SECRET_PATH
517
- ) -> Optional[str]:
518
- """Get the current in-cluster namespace when operating within the cluster.
519
-
520
- First attempt to read it from the `serviceaccount` secret or get it from the kubeconfig_file if it is possible.
521
- It will attempt to take from the active context if it exists and returns None if it does not exist.
522
- """
523
- if namespace_secret_path.exists():
524
- with namespace_secret_path.open() as f:
525
- # We only need to read the first line, this guards us against bad input.
526
- return f.read().strip()
527
-
528
- if not kubeconfig_file:
529
- return None
530
-
531
- try:
532
- _, active_context = kubernetes.config.list_kube_config_contexts(kubeconfig_file)
533
- return active_context["context"]["namespace"]
534
- except KeyError:
535
- return None
536
-
537
-
538
599
  def build_pod_body(
539
600
  pod_name: str,
540
601
  image: Optional[str],
@@ -0,0 +1,62 @@
1
+ import re
2
+ from pathlib import Path
3
+ from typing import TYPE_CHECKING, Optional
4
+
5
+ import kubernetes
6
+ from dagster import __version__ as dagster_version
7
+
8
+ if TYPE_CHECKING:
9
+ from dagster_k8s.job import UserDefinedDagsterK8sConfig
10
+
11
+
12
+ def sanitize_k8s_label(label_name: str):
13
+ # Truncate too long label values to fit into 63-characters limit and avoid invalid characters.
14
+ # https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
15
+ label_name = label_name[:63]
16
+ return re.sub(r"[^a-zA-Z0-9\-_\.]", "-", label_name).strip("-").strip("_").strip(".")
17
+
18
+
19
+ def get_common_labels():
20
+ # See: https://kubernetes.io/docs/concepts/overview/working-with-objects/common-labels/
21
+ return {
22
+ "app.kubernetes.io/name": "dagster",
23
+ "app.kubernetes.io/instance": "dagster",
24
+ "app.kubernetes.io/version": sanitize_k8s_label(dagster_version),
25
+ "app.kubernetes.io/part-of": "dagster",
26
+ }
27
+
28
+
29
+ def get_deployment_id_label(user_defined_k8s_config: "UserDefinedDagsterK8sConfig"):
30
+ env = user_defined_k8s_config.container_config.get("env")
31
+ deployment_name_env_var = (
32
+ next((entry for entry in env if entry["name"] == "DAGSTER_CLOUD_DEPLOYMENT_NAME"), None)
33
+ if env
34
+ else None
35
+ )
36
+ return deployment_name_env_var["value"] if deployment_name_env_var else None
37
+
38
+
39
+ _NAMESPACE_SECRET_PATH = Path("/var/run/secrets/kubernetes.io/serviceaccount/namespace")
40
+
41
+
42
+ def detect_current_namespace(
43
+ kubeconfig_file: Optional[str], namespace_secret_path: Path = _NAMESPACE_SECRET_PATH
44
+ ) -> Optional[str]:
45
+ """Get the current in-cluster namespace when operating within the cluster.
46
+
47
+ First attempt to read it from the `serviceaccount` secret or get it from the kubeconfig_file if it is possible.
48
+ It will attempt to take from the active context if it exists and returns None if it does not exist.
49
+ """
50
+ if namespace_secret_path.exists():
51
+ with namespace_secret_path.open() as f:
52
+ # We only need to read the first line, this guards us against bad input.
53
+ return f.read().strip()
54
+
55
+ if not kubeconfig_file:
56
+ return None
57
+
58
+ try:
59
+ _, active_context = kubernetes.config.list_kube_config_contexts(kubeconfig_file)
60
+ return active_context["context"]["namespace"]
61
+ except KeyError:
62
+ return None
@@ -0,0 +1 @@
1
+ __version__ = "0.28.12"
@@ -1,16 +1,29 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: dagster-k8s
3
- Version: 0.26.2
3
+ Version: 0.28.12
4
4
  Summary: A Dagster integration for k8s
5
5
  Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-k8s
6
6
  Author: Dagster Labs
7
7
  Author-email: hello@dagsterlabs.com
8
8
  License: Apache-2.0
9
- Classifier: Programming Language :: Python :: 3.9
10
9
  Classifier: Programming Language :: Python :: 3.10
11
10
  Classifier: Programming Language :: Python :: 3.11
12
11
  Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Classifier: Programming Language :: Python :: 3.14
13
14
  Classifier: License :: OSI Approved :: Apache Software License
14
15
  Classifier: Operating System :: OS Independent
15
- Requires-Python: >=3.9,<3.13
16
+ Requires-Python: >=3.10,<3.15
16
17
  License-File: LICENSE
18
+ Requires-Dist: dagster==1.12.12
19
+ Requires-Dist: kubernetes<36
20
+ Requires-Dist: google-auth!=2.23.1
21
+ Dynamic: author
22
+ Dynamic: author-email
23
+ Dynamic: classifier
24
+ Dynamic: home-page
25
+ Dynamic: license
26
+ Dynamic: license-file
27
+ Dynamic: requires-dist
28
+ Dynamic: requires-python
29
+ Dynamic: summary
@@ -5,6 +5,7 @@ setup.cfg
5
5
  setup.py
6
6
  dagster_k8s/__init__.py
7
7
  dagster_k8s/client.py
8
+ dagster_k8s/component.py
8
9
  dagster_k8s/container_context.py
9
10
  dagster_k8s/executor.py
10
11
  dagster_k8s/job.py
@@ -0,0 +1,3 @@
1
+ dagster==1.12.12
2
+ kubernetes<36
3
+ google-auth!=2.23.1
@@ -31,18 +31,19 @@ setup(
31
31
  description="A Dagster integration for k8s",
32
32
  url="https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-k8s",
33
33
  classifiers=[
34
- "Programming Language :: Python :: 3.9",
35
34
  "Programming Language :: Python :: 3.10",
36
35
  "Programming Language :: Python :: 3.11",
37
36
  "Programming Language :: Python :: 3.12",
37
+ "Programming Language :: Python :: 3.13",
38
+ "Programming Language :: Python :: 3.14",
38
39
  "License :: OSI Approved :: Apache Software License",
39
40
  "Operating System :: OS Independent",
40
41
  ],
41
42
  packages=find_packages(exclude=["dagster_k8s_tests*"]),
42
43
  include_package_data=True,
43
- python_requires=">=3.9,<3.13",
44
+ python_requires=">=3.10,<3.15",
44
45
  install_requires=[
45
- "dagster==1.10.2",
46
+ "dagster==1.12.12",
46
47
  f"kubernetes<{KUBERNETES_VERSION_UPPER_BOUND}",
47
48
  # exclude a google-auth release that added an overly restrictive urllib3 pin that confuses dependency resolvers
48
49
  "google-auth!=2.23.1",
@@ -1,4 +0,0 @@
1
- # dagster-k8s
2
-
3
- The docs for `dagster-k8s` can be found
4
- [here](https://docs.dagster.io/api/python-api/libraries/dagster-k8s).
@@ -1 +0,0 @@
1
- KUBERNETES_VERSION_UPPER_BOUND = "33"
@@ -1,20 +0,0 @@
1
- import re
2
-
3
- from dagster import __version__ as dagster_version
4
-
5
-
6
- def sanitize_k8s_label(label_name: str):
7
- # Truncate too long label values to fit into 63-characters limit and avoid invalid characters.
8
- # https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
9
- label_name = label_name[:63]
10
- return re.sub(r"[^a-zA-Z0-9\-_\.]", "-", label_name).strip("-").strip("_").strip(".")
11
-
12
-
13
- def get_common_labels():
14
- # See: https://kubernetes.io/docs/concepts/overview/working-with-objects/common-labels/
15
- return {
16
- "app.kubernetes.io/name": "dagster",
17
- "app.kubernetes.io/instance": "dagster",
18
- "app.kubernetes.io/version": sanitize_k8s_label(dagster_version),
19
- "app.kubernetes.io/part-of": "dagster",
20
- }
@@ -1 +0,0 @@
1
- __version__ = "0.26.2"
@@ -1,3 +0,0 @@
1
- dagster==1.10.2
2
- kubernetes<33
3
- google-auth!=2.23.1
File without changes
File without changes