prefect-client 2.20.2__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prefect/__init__.py +74 -110
- prefect/_internal/compatibility/deprecated.py +6 -115
- prefect/_internal/compatibility/experimental.py +4 -79
- prefect/_internal/compatibility/migration.py +166 -0
- prefect/_internal/concurrency/__init__.py +2 -2
- prefect/_internal/concurrency/api.py +1 -35
- prefect/_internal/concurrency/calls.py +0 -6
- prefect/_internal/concurrency/cancellation.py +0 -3
- prefect/_internal/concurrency/event_loop.py +0 -20
- prefect/_internal/concurrency/inspection.py +3 -3
- prefect/_internal/concurrency/primitives.py +1 -0
- prefect/_internal/concurrency/services.py +23 -0
- prefect/_internal/concurrency/threads.py +35 -0
- prefect/_internal/concurrency/waiters.py +0 -28
- prefect/_internal/integrations.py +7 -0
- prefect/_internal/pydantic/__init__.py +0 -45
- prefect/_internal/pydantic/annotations/pendulum.py +2 -2
- prefect/_internal/pydantic/v1_schema.py +21 -22
- prefect/_internal/pydantic/v2_schema.py +0 -2
- prefect/_internal/pydantic/v2_validated_func.py +18 -23
- prefect/_internal/pytz.py +1 -1
- prefect/_internal/retries.py +61 -0
- prefect/_internal/schemas/bases.py +45 -177
- prefect/_internal/schemas/fields.py +1 -43
- prefect/_internal/schemas/validators.py +47 -233
- prefect/agent.py +3 -695
- prefect/artifacts.py +173 -14
- prefect/automations.py +39 -4
- prefect/blocks/abstract.py +1 -1
- prefect/blocks/core.py +423 -164
- prefect/blocks/fields.py +2 -57
- prefect/blocks/notifications.py +43 -28
- prefect/blocks/redis.py +168 -0
- prefect/blocks/system.py +67 -20
- prefect/blocks/webhook.py +2 -9
- prefect/cache_policies.py +239 -0
- prefect/client/__init__.py +4 -0
- prefect/client/base.py +33 -27
- prefect/client/cloud.py +65 -20
- prefect/client/collections.py +1 -1
- prefect/client/orchestration.py +667 -440
- prefect/client/schemas/actions.py +115 -100
- prefect/client/schemas/filters.py +46 -52
- prefect/client/schemas/objects.py +228 -178
- prefect/client/schemas/responses.py +18 -36
- prefect/client/schemas/schedules.py +55 -36
- prefect/client/schemas/sorting.py +2 -0
- prefect/client/subscriptions.py +8 -7
- prefect/client/types/flexible_schedule_list.py +11 -0
- prefect/client/utilities.py +9 -6
- prefect/concurrency/asyncio.py +60 -11
- prefect/concurrency/context.py +24 -0
- prefect/concurrency/events.py +2 -2
- prefect/concurrency/services.py +46 -16
- prefect/concurrency/sync.py +51 -7
- prefect/concurrency/v1/asyncio.py +143 -0
- prefect/concurrency/v1/context.py +27 -0
- prefect/concurrency/v1/events.py +61 -0
- prefect/concurrency/v1/services.py +116 -0
- prefect/concurrency/v1/sync.py +92 -0
- prefect/context.py +246 -149
- prefect/deployments/__init__.py +33 -18
- prefect/deployments/base.py +10 -15
- prefect/deployments/deployments.py +2 -1048
- prefect/deployments/flow_runs.py +178 -0
- prefect/deployments/runner.py +72 -173
- prefect/deployments/schedules.py +31 -25
- prefect/deployments/steps/__init__.py +0 -1
- prefect/deployments/steps/core.py +7 -0
- prefect/deployments/steps/pull.py +15 -21
- prefect/deployments/steps/utility.py +2 -1
- prefect/docker/__init__.py +20 -0
- prefect/docker/docker_image.py +82 -0
- prefect/engine.py +15 -2466
- prefect/events/actions.py +17 -23
- prefect/events/cli/automations.py +20 -7
- prefect/events/clients.py +142 -80
- prefect/events/filters.py +14 -18
- prefect/events/related.py +74 -75
- prefect/events/schemas/__init__.py +0 -5
- prefect/events/schemas/automations.py +55 -46
- prefect/events/schemas/deployment_triggers.py +7 -197
- prefect/events/schemas/events.py +46 -65
- prefect/events/schemas/labelling.py +10 -14
- prefect/events/utilities.py +4 -5
- prefect/events/worker.py +23 -8
- prefect/exceptions.py +15 -0
- prefect/filesystems.py +30 -529
- prefect/flow_engine.py +827 -0
- prefect/flow_runs.py +379 -7
- prefect/flows.py +470 -360
- prefect/futures.py +382 -331
- prefect/infrastructure/__init__.py +5 -26
- prefect/infrastructure/base.py +3 -320
- prefect/infrastructure/provisioners/__init__.py +5 -3
- prefect/infrastructure/provisioners/cloud_run.py +13 -8
- prefect/infrastructure/provisioners/container_instance.py +14 -9
- prefect/infrastructure/provisioners/ecs.py +10 -8
- prefect/infrastructure/provisioners/modal.py +8 -5
- prefect/input/__init__.py +4 -0
- prefect/input/actions.py +2 -4
- prefect/input/run_input.py +9 -9
- prefect/logging/formatters.py +2 -4
- prefect/logging/handlers.py +9 -14
- prefect/logging/loggers.py +5 -5
- prefect/main.py +72 -0
- prefect/plugins.py +2 -64
- prefect/profiles.toml +16 -2
- prefect/records/__init__.py +1 -0
- prefect/records/base.py +223 -0
- prefect/records/filesystem.py +207 -0
- prefect/records/memory.py +178 -0
- prefect/records/result_store.py +64 -0
- prefect/results.py +577 -504
- prefect/runner/runner.py +124 -51
- prefect/runner/server.py +32 -34
- prefect/runner/storage.py +3 -12
- prefect/runner/submit.py +2 -10
- prefect/runner/utils.py +2 -2
- prefect/runtime/__init__.py +1 -0
- prefect/runtime/deployment.py +1 -0
- prefect/runtime/flow_run.py +40 -5
- prefect/runtime/task_run.py +1 -0
- prefect/serializers.py +28 -39
- prefect/server/api/collections_data/views/aggregate-worker-metadata.json +5 -14
- prefect/settings.py +209 -332
- prefect/states.py +160 -63
- prefect/task_engine.py +1478 -57
- prefect/task_runners.py +383 -287
- prefect/task_runs.py +240 -0
- prefect/task_worker.py +463 -0
- prefect/tasks.py +684 -374
- prefect/transactions.py +410 -0
- prefect/types/__init__.py +72 -86
- prefect/types/entrypoint.py +13 -0
- prefect/utilities/annotations.py +4 -3
- prefect/utilities/asyncutils.py +227 -148
- prefect/utilities/callables.py +138 -48
- prefect/utilities/collections.py +134 -86
- prefect/utilities/dispatch.py +27 -14
- prefect/utilities/dockerutils.py +11 -4
- prefect/utilities/engine.py +186 -32
- prefect/utilities/filesystem.py +4 -5
- prefect/utilities/importtools.py +26 -27
- prefect/utilities/pydantic.py +128 -38
- prefect/utilities/schema_tools/hydration.py +18 -1
- prefect/utilities/schema_tools/validation.py +30 -0
- prefect/utilities/services.py +35 -9
- prefect/utilities/templating.py +12 -2
- prefect/utilities/timeout.py +20 -5
- prefect/utilities/urls.py +195 -0
- prefect/utilities/visualization.py +1 -0
- prefect/variables.py +78 -59
- prefect/workers/__init__.py +0 -1
- prefect/workers/base.py +237 -244
- prefect/workers/block.py +5 -226
- prefect/workers/cloud.py +6 -0
- prefect/workers/process.py +265 -12
- prefect/workers/server.py +29 -11
- {prefect_client-2.20.2.dist-info → prefect_client-3.0.0.dist-info}/METADATA +30 -26
- prefect_client-3.0.0.dist-info/RECORD +201 -0
- {prefect_client-2.20.2.dist-info → prefect_client-3.0.0.dist-info}/WHEEL +1 -1
- prefect/_internal/pydantic/_base_model.py +0 -51
- prefect/_internal/pydantic/_compat.py +0 -82
- prefect/_internal/pydantic/_flags.py +0 -20
- prefect/_internal/pydantic/_types.py +0 -8
- prefect/_internal/pydantic/utilities/config_dict.py +0 -72
- prefect/_internal/pydantic/utilities/field_validator.py +0 -150
- prefect/_internal/pydantic/utilities/model_construct.py +0 -56
- prefect/_internal/pydantic/utilities/model_copy.py +0 -55
- prefect/_internal/pydantic/utilities/model_dump.py +0 -136
- prefect/_internal/pydantic/utilities/model_dump_json.py +0 -112
- prefect/_internal/pydantic/utilities/model_fields.py +0 -50
- prefect/_internal/pydantic/utilities/model_fields_set.py +0 -29
- prefect/_internal/pydantic/utilities/model_json_schema.py +0 -82
- prefect/_internal/pydantic/utilities/model_rebuild.py +0 -80
- prefect/_internal/pydantic/utilities/model_validate.py +0 -75
- prefect/_internal/pydantic/utilities/model_validate_json.py +0 -68
- prefect/_internal/pydantic/utilities/model_validator.py +0 -87
- prefect/_internal/pydantic/utilities/type_adapter.py +0 -71
- prefect/_vendor/fastapi/__init__.py +0 -25
- prefect/_vendor/fastapi/applications.py +0 -946
- prefect/_vendor/fastapi/background.py +0 -3
- prefect/_vendor/fastapi/concurrency.py +0 -44
- prefect/_vendor/fastapi/datastructures.py +0 -58
- prefect/_vendor/fastapi/dependencies/__init__.py +0 -0
- prefect/_vendor/fastapi/dependencies/models.py +0 -64
- prefect/_vendor/fastapi/dependencies/utils.py +0 -877
- prefect/_vendor/fastapi/encoders.py +0 -177
- prefect/_vendor/fastapi/exception_handlers.py +0 -40
- prefect/_vendor/fastapi/exceptions.py +0 -46
- prefect/_vendor/fastapi/logger.py +0 -3
- prefect/_vendor/fastapi/middleware/__init__.py +0 -1
- prefect/_vendor/fastapi/middleware/asyncexitstack.py +0 -25
- prefect/_vendor/fastapi/middleware/cors.py +0 -3
- prefect/_vendor/fastapi/middleware/gzip.py +0 -3
- prefect/_vendor/fastapi/middleware/httpsredirect.py +0 -3
- prefect/_vendor/fastapi/middleware/trustedhost.py +0 -3
- prefect/_vendor/fastapi/middleware/wsgi.py +0 -3
- prefect/_vendor/fastapi/openapi/__init__.py +0 -0
- prefect/_vendor/fastapi/openapi/constants.py +0 -2
- prefect/_vendor/fastapi/openapi/docs.py +0 -203
- prefect/_vendor/fastapi/openapi/models.py +0 -480
- prefect/_vendor/fastapi/openapi/utils.py +0 -485
- prefect/_vendor/fastapi/param_functions.py +0 -340
- prefect/_vendor/fastapi/params.py +0 -453
- prefect/_vendor/fastapi/py.typed +0 -0
- prefect/_vendor/fastapi/requests.py +0 -4
- prefect/_vendor/fastapi/responses.py +0 -40
- prefect/_vendor/fastapi/routing.py +0 -1331
- prefect/_vendor/fastapi/security/__init__.py +0 -15
- prefect/_vendor/fastapi/security/api_key.py +0 -98
- prefect/_vendor/fastapi/security/base.py +0 -6
- prefect/_vendor/fastapi/security/http.py +0 -172
- prefect/_vendor/fastapi/security/oauth2.py +0 -227
- prefect/_vendor/fastapi/security/open_id_connect_url.py +0 -34
- prefect/_vendor/fastapi/security/utils.py +0 -10
- prefect/_vendor/fastapi/staticfiles.py +0 -1
- prefect/_vendor/fastapi/templating.py +0 -3
- prefect/_vendor/fastapi/testclient.py +0 -1
- prefect/_vendor/fastapi/types.py +0 -3
- prefect/_vendor/fastapi/utils.py +0 -235
- prefect/_vendor/fastapi/websockets.py +0 -7
- prefect/_vendor/starlette/__init__.py +0 -1
- prefect/_vendor/starlette/_compat.py +0 -28
- prefect/_vendor/starlette/_exception_handler.py +0 -80
- prefect/_vendor/starlette/_utils.py +0 -88
- prefect/_vendor/starlette/applications.py +0 -261
- prefect/_vendor/starlette/authentication.py +0 -159
- prefect/_vendor/starlette/background.py +0 -43
- prefect/_vendor/starlette/concurrency.py +0 -59
- prefect/_vendor/starlette/config.py +0 -151
- prefect/_vendor/starlette/convertors.py +0 -87
- prefect/_vendor/starlette/datastructures.py +0 -707
- prefect/_vendor/starlette/endpoints.py +0 -130
- prefect/_vendor/starlette/exceptions.py +0 -60
- prefect/_vendor/starlette/formparsers.py +0 -276
- prefect/_vendor/starlette/middleware/__init__.py +0 -17
- prefect/_vendor/starlette/middleware/authentication.py +0 -52
- prefect/_vendor/starlette/middleware/base.py +0 -220
- prefect/_vendor/starlette/middleware/cors.py +0 -176
- prefect/_vendor/starlette/middleware/errors.py +0 -265
- prefect/_vendor/starlette/middleware/exceptions.py +0 -74
- prefect/_vendor/starlette/middleware/gzip.py +0 -113
- prefect/_vendor/starlette/middleware/httpsredirect.py +0 -19
- prefect/_vendor/starlette/middleware/sessions.py +0 -82
- prefect/_vendor/starlette/middleware/trustedhost.py +0 -64
- prefect/_vendor/starlette/middleware/wsgi.py +0 -147
- prefect/_vendor/starlette/py.typed +0 -0
- prefect/_vendor/starlette/requests.py +0 -328
- prefect/_vendor/starlette/responses.py +0 -347
- prefect/_vendor/starlette/routing.py +0 -933
- prefect/_vendor/starlette/schemas.py +0 -154
- prefect/_vendor/starlette/staticfiles.py +0 -248
- prefect/_vendor/starlette/status.py +0 -199
- prefect/_vendor/starlette/templating.py +0 -231
- prefect/_vendor/starlette/testclient.py +0 -804
- prefect/_vendor/starlette/types.py +0 -30
- prefect/_vendor/starlette/websockets.py +0 -193
- prefect/blocks/kubernetes.py +0 -119
- prefect/deprecated/__init__.py +0 -0
- prefect/deprecated/data_documents.py +0 -350
- prefect/deprecated/packaging/__init__.py +0 -12
- prefect/deprecated/packaging/base.py +0 -96
- prefect/deprecated/packaging/docker.py +0 -146
- prefect/deprecated/packaging/file.py +0 -92
- prefect/deprecated/packaging/orion.py +0 -80
- prefect/deprecated/packaging/serializers.py +0 -171
- prefect/events/instrument.py +0 -135
- prefect/infrastructure/container.py +0 -824
- prefect/infrastructure/kubernetes.py +0 -920
- prefect/infrastructure/process.py +0 -289
- prefect/manifests.py +0 -20
- prefect/new_flow_engine.py +0 -449
- prefect/new_task_engine.py +0 -423
- prefect/pydantic/__init__.py +0 -76
- prefect/pydantic/main.py +0 -39
- prefect/software/__init__.py +0 -2
- prefect/software/base.py +0 -50
- prefect/software/conda.py +0 -199
- prefect/software/pip.py +0 -122
- prefect/software/python.py +0 -52
- prefect/task_server.py +0 -322
- prefect_client-2.20.2.dist-info/RECORD +0 -294
- /prefect/{_internal/pydantic/utilities → client/types}/__init__.py +0 -0
- /prefect/{_vendor → concurrency/v1}/__init__.py +0 -0
- {prefect_client-2.20.2.dist-info → prefect_client-3.0.0.dist-info}/LICENSE +0 -0
- {prefect_client-2.20.2.dist-info → prefect_client-3.0.0.dist-info}/top_level.txt +0 -0
@@ -1,920 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
DEPRECATION WARNING:
|
3
|
-
|
4
|
-
This module is deprecated as of March 2024 and will not be available after September 2024.
|
5
|
-
It has been replaced by the Kubernetes worker from the prefect-kubernetes package, which offers enhanced functionality and better performance.
|
6
|
-
|
7
|
-
For upgrade instructions, see https://docs.prefect.io/latest/guides/upgrade-guide-agents-to-workers/.
|
8
|
-
"""
|
9
|
-
import copy
|
10
|
-
import enum
|
11
|
-
import math
|
12
|
-
import os
|
13
|
-
import shlex
|
14
|
-
import time
|
15
|
-
from contextlib import contextmanager
|
16
|
-
from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Tuple, Union
|
17
|
-
|
18
|
-
import anyio.abc
|
19
|
-
import yaml
|
20
|
-
|
21
|
-
from prefect._internal.compatibility.deprecated import (
|
22
|
-
deprecated_class,
|
23
|
-
)
|
24
|
-
from prefect._internal.pydantic import HAS_PYDANTIC_V2
|
25
|
-
from prefect._internal.schemas.validators import (
|
26
|
-
cast_k8s_job_customizations,
|
27
|
-
set_default_image,
|
28
|
-
set_default_namespace,
|
29
|
-
validate_k8s_job_compatible_values,
|
30
|
-
validate_k8s_job_required_components,
|
31
|
-
)
|
32
|
-
|
33
|
-
if HAS_PYDANTIC_V2:
|
34
|
-
from pydantic.v1 import Field, root_validator, validator
|
35
|
-
else:
|
36
|
-
from pydantic import Field, root_validator, validator
|
37
|
-
|
38
|
-
from typing_extensions import Literal
|
39
|
-
|
40
|
-
from prefect.blocks.kubernetes import KubernetesClusterConfig
|
41
|
-
from prefect.exceptions import InfrastructureNotAvailable, InfrastructureNotFound
|
42
|
-
from prefect.infrastructure.base import Infrastructure, InfrastructureResult
|
43
|
-
from prefect.utilities.asyncutils import run_sync_in_worker_thread, sync_compatible
|
44
|
-
from prefect.utilities.hashing import stable_hash
|
45
|
-
from prefect.utilities.importtools import lazy_import
|
46
|
-
from prefect.utilities.pydantic import JsonPatch
|
47
|
-
from prefect.utilities.slugify import slugify
|
48
|
-
|
49
|
-
if TYPE_CHECKING:
|
50
|
-
import kubernetes
|
51
|
-
import kubernetes.client
|
52
|
-
import kubernetes.client.exceptions
|
53
|
-
import kubernetes.config
|
54
|
-
from kubernetes.client import BatchV1Api, CoreV1Api, V1Job, V1Pod
|
55
|
-
else:
|
56
|
-
kubernetes = lazy_import("kubernetes")
|
57
|
-
|
58
|
-
|
59
|
-
class KubernetesImagePullPolicy(enum.Enum):
|
60
|
-
IF_NOT_PRESENT = "IfNotPresent"
|
61
|
-
ALWAYS = "Always"
|
62
|
-
NEVER = "Never"
|
63
|
-
|
64
|
-
|
65
|
-
class KubernetesRestartPolicy(enum.Enum):
|
66
|
-
ON_FAILURE = "OnFailure"
|
67
|
-
NEVER = "Never"
|
68
|
-
|
69
|
-
|
70
|
-
KubernetesManifest = Dict[str, Any]
|
71
|
-
|
72
|
-
|
73
|
-
class KubernetesJobResult(InfrastructureResult):
|
74
|
-
"""Contains information about the final state of a completed Kubernetes Job"""
|
75
|
-
|
76
|
-
|
77
|
-
@deprecated_class(
|
78
|
-
start_date="Mar 2024",
|
79
|
-
help="Use the Kubernetes worker from prefect-kubernetes instead."
|
80
|
-
" Refer to the upgrade guide for more information:"
|
81
|
-
" https://docs.prefect.io/latest/guides/upgrade-guide-agents-to-workers/.",
|
82
|
-
)
|
83
|
-
class KubernetesJob(Infrastructure):
|
84
|
-
"""
|
85
|
-
Runs a command as a Kubernetes Job.
|
86
|
-
|
87
|
-
For a guided tutorial, see [How to use Kubernetes with Prefect](https://medium.com/the-prefect-blog/how-to-use-kubernetes-with-prefect-419b2e8b8cb2/).
|
88
|
-
For more information, including examples for customizing the resulting manifest, see [`KubernetesJob` infrastructure concepts](https://docs.prefect.io/concepts/infrastructure/#kubernetesjob).
|
89
|
-
|
90
|
-
Attributes:
|
91
|
-
cluster_config: An optional Kubernetes cluster config to use for this job.
|
92
|
-
command: A list of strings specifying the command to run in the container to
|
93
|
-
start the flow run. In most cases you should not override this.
|
94
|
-
customizations: A list of JSON 6902 patches to apply to the base Job manifest.
|
95
|
-
env: Environment variables to set for the container.
|
96
|
-
finished_job_ttl: The number of seconds to retain jobs after completion. If set, finished jobs will
|
97
|
-
be cleaned up by Kubernetes after the given delay. If None (default), jobs will need to be
|
98
|
-
manually removed.
|
99
|
-
image: An optional string specifying the image reference of a container image
|
100
|
-
to use for the job, for example, docker.io/prefecthq/prefect:2-latest. The
|
101
|
-
behavior is as described in https://kubernetes.io/docs/concepts/containers/images/#image-names.
|
102
|
-
Defaults to the Prefect image.
|
103
|
-
image_pull_policy: The Kubernetes image pull policy to use for job containers.
|
104
|
-
job: The base manifest for the Kubernetes Job.
|
105
|
-
job_watch_timeout_seconds: Number of seconds to wait for the job to complete
|
106
|
-
before marking it as crashed. Defaults to `None`, which means no timeout will be enforced.
|
107
|
-
labels: An optional dictionary of labels to add to the job.
|
108
|
-
name: An optional name for the job.
|
109
|
-
namespace: An optional string signifying the Kubernetes namespace to use.
|
110
|
-
pod_watch_timeout_seconds: Number of seconds to watch for pod creation before timing out (default 60).
|
111
|
-
service_account_name: An optional string specifying which Kubernetes service account to use.
|
112
|
-
stream_output: If set, stream output from the job to local standard output.
|
113
|
-
"""
|
114
|
-
|
115
|
-
_logo_url = "https://cdn.sanity.io/images/3ugk85nk/production/2d0b896006ad463b49c28aaac14f31e00e32cfab-250x250.png"
|
116
|
-
_documentation_url = "https://docs.prefect.io/api-ref/prefect/infrastructure/#prefect.infrastructure.KubernetesJob"
|
117
|
-
|
118
|
-
type: Literal["kubernetes-job"] = Field(
|
119
|
-
default="kubernetes-job", description="The type of infrastructure."
|
120
|
-
)
|
121
|
-
# shortcuts for the most common user-serviceable settings
|
122
|
-
image: Optional[str] = Field(
|
123
|
-
default=None,
|
124
|
-
description=(
|
125
|
-
"The image reference of a container image to use for the job, for example,"
|
126
|
-
" `docker.io/prefecthq/prefect:2-latest`.The behavior is as described in"
|
127
|
-
" the Kubernetes documentation and uses the latest version of Prefect by"
|
128
|
-
" default, unless an image is already present in a provided job manifest."
|
129
|
-
),
|
130
|
-
)
|
131
|
-
namespace: Optional[str] = Field(
|
132
|
-
default=None,
|
133
|
-
description=(
|
134
|
-
"The Kubernetes namespace to use for this job. Defaults to 'default' "
|
135
|
-
"unless a namespace is already present in a provided job manifest."
|
136
|
-
),
|
137
|
-
)
|
138
|
-
service_account_name: Optional[str] = Field(
|
139
|
-
default=None, description="The Kubernetes service account to use for this job."
|
140
|
-
)
|
141
|
-
image_pull_policy: Optional[KubernetesImagePullPolicy] = Field(
|
142
|
-
default=None,
|
143
|
-
description="The Kubernetes image pull policy to use for job containers.",
|
144
|
-
)
|
145
|
-
|
146
|
-
# connection to a cluster
|
147
|
-
cluster_config: Optional[KubernetesClusterConfig] = Field(
|
148
|
-
default=None, description="The Kubernetes cluster config to use for this job."
|
149
|
-
)
|
150
|
-
|
151
|
-
# settings allowing full customization of the Job
|
152
|
-
job: KubernetesManifest = Field(
|
153
|
-
default_factory=lambda: KubernetesJob.base_job_manifest(),
|
154
|
-
description="The base manifest for the Kubernetes Job.",
|
155
|
-
title="Base Job Manifest",
|
156
|
-
)
|
157
|
-
customizations: JsonPatch = Field(
|
158
|
-
default_factory=lambda: JsonPatch([]),
|
159
|
-
description="A list of JSON 6902 patches to apply to the base Job manifest.",
|
160
|
-
)
|
161
|
-
|
162
|
-
# controls the behavior of execution
|
163
|
-
job_watch_timeout_seconds: Optional[int] = Field(
|
164
|
-
default=None,
|
165
|
-
description=(
|
166
|
-
"Number of seconds to wait for the job to complete before marking it as"
|
167
|
-
" crashed. Defaults to `None`, which means no timeout will be enforced."
|
168
|
-
),
|
169
|
-
)
|
170
|
-
pod_watch_timeout_seconds: int = Field(
|
171
|
-
default=60,
|
172
|
-
description="Number of seconds to watch for pod creation before timing out.",
|
173
|
-
)
|
174
|
-
stream_output: bool = Field(
|
175
|
-
default=True,
|
176
|
-
description=(
|
177
|
-
"If set, output will be streamed from the job to local standard output."
|
178
|
-
),
|
179
|
-
)
|
180
|
-
finished_job_ttl: Optional[int] = Field(
|
181
|
-
default=None,
|
182
|
-
description=(
|
183
|
-
"The number of seconds to retain jobs after completion. If set, finished"
|
184
|
-
" jobs will be cleaned up by Kubernetes after the given delay. If None"
|
185
|
-
" (default), jobs will need to be manually removed."
|
186
|
-
),
|
187
|
-
)
|
188
|
-
|
189
|
-
# internal-use only right now
|
190
|
-
_api_dns_name: Optional[str] = None # Replaces 'localhost' in API URL
|
191
|
-
|
192
|
-
_block_type_name = "Kubernetes Job"
|
193
|
-
|
194
|
-
@validator("job")
|
195
|
-
def ensure_job_includes_all_required_components(cls, value: KubernetesManifest):
|
196
|
-
return validate_k8s_job_required_components(cls, value)
|
197
|
-
|
198
|
-
@validator("job")
|
199
|
-
def ensure_job_has_compatible_values(cls, value: KubernetesManifest):
|
200
|
-
return validate_k8s_job_compatible_values(cls, value)
|
201
|
-
|
202
|
-
@validator("customizations", pre=True)
|
203
|
-
def cast_customizations_to_a_json_patch(
|
204
|
-
cls, value: Union[List[Dict], JsonPatch, str]
|
205
|
-
) -> JsonPatch:
|
206
|
-
return cast_k8s_job_customizations(cls, value)
|
207
|
-
|
208
|
-
@root_validator
|
209
|
-
def default_namespace(cls, values):
|
210
|
-
return set_default_namespace(values)
|
211
|
-
|
212
|
-
@root_validator
|
213
|
-
def default_image(cls, values):
|
214
|
-
return set_default_image(values)
|
215
|
-
|
216
|
-
# Support serialization of the 'JsonPatch' type
|
217
|
-
class Config:
|
218
|
-
arbitrary_types_allowed = True
|
219
|
-
json_encoders = {JsonPatch: lambda p: p.patch}
|
220
|
-
|
221
|
-
def dict(self, *args, **kwargs) -> Dict:
|
222
|
-
d = super().dict(*args, **kwargs)
|
223
|
-
d["customizations"] = self.customizations.patch
|
224
|
-
return d
|
225
|
-
|
226
|
-
@classmethod
|
227
|
-
def base_job_manifest(cls) -> KubernetesManifest:
|
228
|
-
"""Produces the bare minimum allowed Job manifest"""
|
229
|
-
return {
|
230
|
-
"apiVersion": "batch/v1",
|
231
|
-
"kind": "Job",
|
232
|
-
"metadata": {"labels": {}},
|
233
|
-
"spec": {
|
234
|
-
"template": {
|
235
|
-
"spec": {
|
236
|
-
"parallelism": 1,
|
237
|
-
"completions": 1,
|
238
|
-
"restartPolicy": "Never",
|
239
|
-
"containers": [
|
240
|
-
{
|
241
|
-
"name": "prefect-job",
|
242
|
-
"env": [],
|
243
|
-
}
|
244
|
-
],
|
245
|
-
}
|
246
|
-
}
|
247
|
-
},
|
248
|
-
}
|
249
|
-
|
250
|
-
# Note that we're using the yaml package to load both YAML and JSON files below.
|
251
|
-
# This works because YAML is a strict superset of JSON:
|
252
|
-
#
|
253
|
-
# > The YAML 1.23 specification was published in 2009. Its primary focus was
|
254
|
-
# > making YAML a strict superset of JSON. It also removed many of the problematic
|
255
|
-
# > implicit typing recommendations.
|
256
|
-
#
|
257
|
-
# https://yaml.org/spec/1.2.2/#12-yaml-history
|
258
|
-
|
259
|
-
@classmethod
|
260
|
-
def job_from_file(cls, filename: str) -> KubernetesManifest:
|
261
|
-
"""Load a Kubernetes Job manifest from a YAML or JSON file."""
|
262
|
-
with open(filename, "r", encoding="utf-8") as f:
|
263
|
-
return yaml.load(f, yaml.SafeLoader)
|
264
|
-
|
265
|
-
@classmethod
|
266
|
-
def customize_from_file(cls, filename: str) -> JsonPatch:
|
267
|
-
"""Load an RFC 6902 JSON patch from a YAML or JSON file."""
|
268
|
-
with open(filename, "r", encoding="utf-8") as f:
|
269
|
-
return JsonPatch(yaml.load(f, yaml.SafeLoader))
|
270
|
-
|
271
|
-
@sync_compatible
|
272
|
-
async def run(
|
273
|
-
self,
|
274
|
-
task_status: Optional[anyio.abc.TaskStatus] = None,
|
275
|
-
) -> KubernetesJobResult:
|
276
|
-
if not self.command:
|
277
|
-
raise ValueError("Kubernetes job cannot be run with empty command.")
|
278
|
-
|
279
|
-
self._configure_kubernetes_library_client()
|
280
|
-
manifest = self.build_job()
|
281
|
-
job = await run_sync_in_worker_thread(self._create_job, manifest)
|
282
|
-
|
283
|
-
pid = await run_sync_in_worker_thread(self._get_infrastructure_pid, job)
|
284
|
-
# Indicate that the job has started
|
285
|
-
if task_status is not None:
|
286
|
-
task_status.started(pid)
|
287
|
-
|
288
|
-
# Monitor the job until completion
|
289
|
-
status_code = await run_sync_in_worker_thread(
|
290
|
-
self._watch_job, job.metadata.name
|
291
|
-
)
|
292
|
-
return KubernetesJobResult(identifier=pid, status_code=status_code)
|
293
|
-
|
294
|
-
async def kill(self, infrastructure_pid: str, grace_seconds: int = 30):
|
295
|
-
self._configure_kubernetes_library_client()
|
296
|
-
job_cluster_uid, job_namespace, job_name = self._parse_infrastructure_pid(
|
297
|
-
infrastructure_pid
|
298
|
-
)
|
299
|
-
|
300
|
-
if not job_namespace == self.namespace:
|
301
|
-
raise InfrastructureNotAvailable(
|
302
|
-
f"Unable to kill job {job_name!r}: The job is running in namespace "
|
303
|
-
f"{job_namespace!r} but this block is configured to use "
|
304
|
-
f"{self.namespace!r}."
|
305
|
-
)
|
306
|
-
|
307
|
-
current_cluster_uid = self._get_cluster_uid()
|
308
|
-
if job_cluster_uid != current_cluster_uid:
|
309
|
-
raise InfrastructureNotAvailable(
|
310
|
-
f"Unable to kill job {job_name!r}: The job is running on another "
|
311
|
-
"cluster."
|
312
|
-
)
|
313
|
-
|
314
|
-
with self.get_batch_client() as batch_client:
|
315
|
-
try:
|
316
|
-
batch_client.delete_namespaced_job(
|
317
|
-
name=job_name,
|
318
|
-
namespace=job_namespace,
|
319
|
-
grace_period_seconds=grace_seconds,
|
320
|
-
# Foreground propagation deletes dependent objects before deleting owner objects.
|
321
|
-
# This ensures that the pods are cleaned up before the job is marked as deleted.
|
322
|
-
# See: https://kubernetes.io/docs/concepts/architecture/garbage-collection/#foreground-deletion
|
323
|
-
propagation_policy="Foreground",
|
324
|
-
)
|
325
|
-
except kubernetes.client.exceptions.ApiException as exc:
|
326
|
-
if exc.status == 404:
|
327
|
-
raise InfrastructureNotFound(
|
328
|
-
f"Unable to kill job {job_name!r}: The job was not found."
|
329
|
-
) from exc
|
330
|
-
else:
|
331
|
-
raise
|
332
|
-
|
333
|
-
def preview(self):
|
334
|
-
return yaml.dump(self.build_job())
|
335
|
-
|
336
|
-
def get_corresponding_worker_type(self):
|
337
|
-
return "kubernetes"
|
338
|
-
|
339
|
-
async def generate_work_pool_base_job_template(self):
|
340
|
-
from prefect.workers.utilities import (
|
341
|
-
get_default_base_job_template_for_infrastructure_type,
|
342
|
-
)
|
343
|
-
|
344
|
-
base_job_template = await get_default_base_job_template_for_infrastructure_type(
|
345
|
-
self.get_corresponding_worker_type()
|
346
|
-
)
|
347
|
-
assert (
|
348
|
-
base_job_template is not None
|
349
|
-
), "Failed to retrieve default base job template."
|
350
|
-
for key, value in self.dict(exclude_unset=True, exclude_defaults=True).items():
|
351
|
-
if key == "command":
|
352
|
-
base_job_template["variables"]["properties"]["command"][
|
353
|
-
"default"
|
354
|
-
] = shlex.join(value)
|
355
|
-
elif key in [
|
356
|
-
"type",
|
357
|
-
"block_type_slug",
|
358
|
-
"_block_document_id",
|
359
|
-
"_block_document_name",
|
360
|
-
"_is_anonymous",
|
361
|
-
"job",
|
362
|
-
"customizations",
|
363
|
-
]:
|
364
|
-
continue
|
365
|
-
elif key == "image_pull_policy":
|
366
|
-
base_job_template["variables"]["properties"]["image_pull_policy"][
|
367
|
-
"default"
|
368
|
-
] = value.value
|
369
|
-
elif key == "cluster_config":
|
370
|
-
base_job_template["variables"]["properties"]["cluster_config"][
|
371
|
-
"default"
|
372
|
-
] = {
|
373
|
-
"$ref": {
|
374
|
-
"block_document_id": str(self.cluster_config._block_document_id)
|
375
|
-
}
|
376
|
-
}
|
377
|
-
elif key in base_job_template["variables"]["properties"]:
|
378
|
-
base_job_template["variables"]["properties"][key]["default"] = value
|
379
|
-
else:
|
380
|
-
self.logger.warning(
|
381
|
-
f"Variable {key!r} is not supported by Kubernetes work pools."
|
382
|
-
" Skipping."
|
383
|
-
)
|
384
|
-
|
385
|
-
custom_job_manifest = self.dict(exclude_unset=True, exclude_defaults=True).get(
|
386
|
-
"job"
|
387
|
-
)
|
388
|
-
if custom_job_manifest:
|
389
|
-
job_manifest = self.build_job()
|
390
|
-
else:
|
391
|
-
job_manifest = copy.deepcopy(
|
392
|
-
base_job_template["job_configuration"]["job_manifest"]
|
393
|
-
)
|
394
|
-
job_manifest = self.customizations.apply(job_manifest)
|
395
|
-
base_job_template["job_configuration"]["job_manifest"] = job_manifest
|
396
|
-
|
397
|
-
return base_job_template
|
398
|
-
|
399
|
-
def build_job(self) -> KubernetesManifest:
|
400
|
-
"""Builds the Kubernetes Job Manifest"""
|
401
|
-
job_manifest = copy.copy(self.job)
|
402
|
-
job_manifest = self._shortcut_customizations().apply(job_manifest)
|
403
|
-
job_manifest = self.customizations.apply(job_manifest)
|
404
|
-
return job_manifest
|
405
|
-
|
406
|
-
@contextmanager
|
407
|
-
def get_batch_client(self) -> Generator["BatchV1Api", None, None]:
|
408
|
-
with kubernetes.client.ApiClient() as client:
|
409
|
-
try:
|
410
|
-
yield kubernetes.client.BatchV1Api(api_client=client)
|
411
|
-
finally:
|
412
|
-
client.rest_client.pool_manager.clear()
|
413
|
-
|
414
|
-
@contextmanager
|
415
|
-
def get_client(self) -> Generator["CoreV1Api", None, None]:
|
416
|
-
with kubernetes.client.ApiClient() as client:
|
417
|
-
try:
|
418
|
-
yield kubernetes.client.CoreV1Api(api_client=client)
|
419
|
-
finally:
|
420
|
-
client.rest_client.pool_manager.clear()
|
421
|
-
|
422
|
-
def _get_infrastructure_pid(self, job: "V1Job") -> str:
|
423
|
-
"""
|
424
|
-
Generates a Kubernetes infrastructure PID.
|
425
|
-
|
426
|
-
The PID is in the format: "<cluster uid>:<namespace>:<job name>".
|
427
|
-
"""
|
428
|
-
cluster_uid = self._get_cluster_uid()
|
429
|
-
pid = f"{cluster_uid}:{self.namespace}:{job.metadata.name}"
|
430
|
-
return pid
|
431
|
-
|
432
|
-
def _parse_infrastructure_pid(
|
433
|
-
self, infrastructure_pid: str
|
434
|
-
) -> Tuple[str, str, str]:
|
435
|
-
"""
|
436
|
-
Parse a Kubernetes infrastructure PID into its component parts.
|
437
|
-
|
438
|
-
Returns a cluster UID, namespace, and job name.
|
439
|
-
"""
|
440
|
-
cluster_uid, namespace, job_name = infrastructure_pid.split(":", 2)
|
441
|
-
return cluster_uid, namespace, job_name
|
442
|
-
|
443
|
-
def _get_cluster_uid(self) -> str:
|
444
|
-
"""
|
445
|
-
Gets a unique id for the current cluster being used.
|
446
|
-
|
447
|
-
There is no real unique identifier for a cluster. However, the `kube-system`
|
448
|
-
namespace is immutable and has a persistence UID that we use instead.
|
449
|
-
|
450
|
-
PREFECT_KUBERNETES_CLUSTER_UID can be set in cases where the `kube-system`
|
451
|
-
namespace cannot be read e.g. when a cluster role cannot be created. If set,
|
452
|
-
this variable will be used and we will not attempt to read the `kube-system`
|
453
|
-
namespace.
|
454
|
-
|
455
|
-
See https://github.com/kubernetes/kubernetes/issues/44954
|
456
|
-
"""
|
457
|
-
# Default to an environment variable
|
458
|
-
env_cluster_uid = os.environ.get("PREFECT_KUBERNETES_CLUSTER_UID")
|
459
|
-
if env_cluster_uid:
|
460
|
-
return env_cluster_uid
|
461
|
-
|
462
|
-
# Read the UID from the cluster namespace
|
463
|
-
with self.get_client() as client:
|
464
|
-
namespace = client.read_namespace("kube-system")
|
465
|
-
cluster_uid = namespace.metadata.uid
|
466
|
-
|
467
|
-
return cluster_uid
|
468
|
-
|
469
|
-
def _configure_kubernetes_library_client(self) -> None:
|
470
|
-
"""
|
471
|
-
Set the correct kubernetes client configuration.
|
472
|
-
|
473
|
-
WARNING: This action is not threadsafe and may override the configuration
|
474
|
-
specified by another `KubernetesJob` instance.
|
475
|
-
"""
|
476
|
-
# TODO: Investigate returning a configured client so calls on other threads
|
477
|
-
# will not invalidate the config needed here
|
478
|
-
|
479
|
-
# if a k8s cluster block is provided to the flow runner, use that
|
480
|
-
if self.cluster_config:
|
481
|
-
self.cluster_config.configure_client()
|
482
|
-
else:
|
483
|
-
# If no block specified, try to load Kubernetes configuration within a cluster. If that doesn't
|
484
|
-
# work, try to load the configuration from the local environment, allowing
|
485
|
-
# any further ConfigExceptions to bubble up.
|
486
|
-
try:
|
487
|
-
kubernetes.config.load_incluster_config()
|
488
|
-
except kubernetes.config.ConfigException:
|
489
|
-
kubernetes.config.load_kube_config()
|
490
|
-
|
491
|
-
def _shortcut_customizations(self) -> JsonPatch:
|
492
|
-
"""Produces the JSON 6902 patch for the most commonly used customizations, like
|
493
|
-
image and namespace, which we offer as top-level parameters (with sensible
|
494
|
-
default values)"""
|
495
|
-
shortcuts = []
|
496
|
-
|
497
|
-
if self.namespace:
|
498
|
-
shortcuts.append(
|
499
|
-
{
|
500
|
-
"op": "add",
|
501
|
-
"path": "/metadata/namespace",
|
502
|
-
"value": self.namespace,
|
503
|
-
}
|
504
|
-
)
|
505
|
-
|
506
|
-
if self.image:
|
507
|
-
shortcuts.append(
|
508
|
-
{
|
509
|
-
"op": "add",
|
510
|
-
"path": "/spec/template/spec/containers/0/image",
|
511
|
-
"value": self.image,
|
512
|
-
}
|
513
|
-
)
|
514
|
-
|
515
|
-
shortcuts += [
|
516
|
-
{
|
517
|
-
"op": "add",
|
518
|
-
"path": (
|
519
|
-
f"/metadata/labels/{self._slugify_label_key(key).replace('/', '~1', 1)}"
|
520
|
-
),
|
521
|
-
"value": self._slugify_label_value(value),
|
522
|
-
}
|
523
|
-
for key, value in self.labels.items()
|
524
|
-
]
|
525
|
-
|
526
|
-
shortcuts += [
|
527
|
-
{
|
528
|
-
"op": "add",
|
529
|
-
"path": "/spec/template/spec/containers/0/env/-",
|
530
|
-
"value": {"name": key, "value": value},
|
531
|
-
}
|
532
|
-
for key, value in self._get_environment_variables().items()
|
533
|
-
]
|
534
|
-
|
535
|
-
if self.image_pull_policy:
|
536
|
-
shortcuts.append(
|
537
|
-
{
|
538
|
-
"op": "add",
|
539
|
-
"path": "/spec/template/spec/containers/0/imagePullPolicy",
|
540
|
-
"value": self.image_pull_policy.value,
|
541
|
-
}
|
542
|
-
)
|
543
|
-
|
544
|
-
if self.service_account_name:
|
545
|
-
shortcuts.append(
|
546
|
-
{
|
547
|
-
"op": "add",
|
548
|
-
"path": "/spec/template/spec/serviceAccountName",
|
549
|
-
"value": self.service_account_name,
|
550
|
-
}
|
551
|
-
)
|
552
|
-
|
553
|
-
if self.finished_job_ttl is not None:
|
554
|
-
shortcuts.append(
|
555
|
-
{
|
556
|
-
"op": "add",
|
557
|
-
"path": "/spec/ttlSecondsAfterFinished",
|
558
|
-
"value": self.finished_job_ttl,
|
559
|
-
}
|
560
|
-
)
|
561
|
-
|
562
|
-
if self.command:
|
563
|
-
shortcuts.append(
|
564
|
-
{
|
565
|
-
"op": "add",
|
566
|
-
"path": "/spec/template/spec/containers/0/args",
|
567
|
-
"value": self.command,
|
568
|
-
}
|
569
|
-
)
|
570
|
-
|
571
|
-
if self.name:
|
572
|
-
shortcuts.append(
|
573
|
-
{
|
574
|
-
"op": "add",
|
575
|
-
"path": "/metadata/generateName",
|
576
|
-
"value": self._slugify_name(self.name) + "-",
|
577
|
-
}
|
578
|
-
)
|
579
|
-
else:
|
580
|
-
# Generate name is required
|
581
|
-
shortcuts.append(
|
582
|
-
{
|
583
|
-
"op": "add",
|
584
|
-
"path": "/metadata/generateName",
|
585
|
-
"value": (
|
586
|
-
"prefect-job-"
|
587
|
-
# We generate a name using a hash of the primary job settings
|
588
|
-
+ stable_hash(
|
589
|
-
*self.command if self.command else "",
|
590
|
-
*self.env.keys(),
|
591
|
-
*[v for v in self.env.values() if v is not None],
|
592
|
-
)
|
593
|
-
+ "-"
|
594
|
-
),
|
595
|
-
}
|
596
|
-
)
|
597
|
-
|
598
|
-
return JsonPatch(shortcuts)
|
599
|
-
|
600
|
-
def _get_job(self, job_id: str) -> Optional["V1Job"]:
|
601
|
-
with self.get_batch_client() as batch_client:
|
602
|
-
try:
|
603
|
-
job = batch_client.read_namespaced_job(job_id, self.namespace)
|
604
|
-
except kubernetes.client.exceptions.ApiException:
|
605
|
-
self.logger.error(f"Job {job_id!r} was removed.", exc_info=True)
|
606
|
-
return None
|
607
|
-
return job
|
608
|
-
|
609
|
-
def _get_job_pod(self, job_name: str) -> "V1Pod":
|
610
|
-
"""Get the first running pod for a job."""
|
611
|
-
|
612
|
-
# Wait until we find a running pod for the job
|
613
|
-
# if `pod_watch_timeout_seconds` is None, no timeout will be enforced
|
614
|
-
watch = kubernetes.watch.Watch()
|
615
|
-
self.logger.debug(f"Job {job_name!r}: Starting watch for pod start...")
|
616
|
-
last_phase = None
|
617
|
-
with self.get_client() as client:
|
618
|
-
for event in watch.stream(
|
619
|
-
func=client.list_namespaced_pod,
|
620
|
-
namespace=self.namespace,
|
621
|
-
label_selector=f"job-name={job_name}",
|
622
|
-
timeout_seconds=self.pod_watch_timeout_seconds,
|
623
|
-
):
|
624
|
-
phase = event["object"].status.phase
|
625
|
-
if phase != last_phase:
|
626
|
-
self.logger.info(f"Job {job_name!r}: Pod has status {phase!r}.")
|
627
|
-
|
628
|
-
if phase != "Pending":
|
629
|
-
watch.stop()
|
630
|
-
return event["object"]
|
631
|
-
|
632
|
-
last_phase = phase
|
633
|
-
|
634
|
-
self.logger.error(f"Job {job_name!r}: Pod never started.")
|
635
|
-
|
636
|
-
def _watch_job(self, job_name: str) -> int:
|
637
|
-
"""
|
638
|
-
Watch a job.
|
639
|
-
|
640
|
-
Return the final status code of the first container.
|
641
|
-
"""
|
642
|
-
self.logger.debug(f"Job {job_name!r}: Monitoring job...")
|
643
|
-
|
644
|
-
job = self._get_job(job_name)
|
645
|
-
if not job:
|
646
|
-
return -1
|
647
|
-
|
648
|
-
pod = self._get_job_pod(job_name)
|
649
|
-
if not pod:
|
650
|
-
return -1
|
651
|
-
|
652
|
-
# Calculate the deadline before streaming output
|
653
|
-
deadline = (
|
654
|
-
(time.monotonic() + self.job_watch_timeout_seconds)
|
655
|
-
if self.job_watch_timeout_seconds is not None
|
656
|
-
else None
|
657
|
-
)
|
658
|
-
|
659
|
-
if self.stream_output:
|
660
|
-
with self.get_client() as client:
|
661
|
-
logs = client.read_namespaced_pod_log(
|
662
|
-
pod.metadata.name,
|
663
|
-
self.namespace,
|
664
|
-
follow=True,
|
665
|
-
_preload_content=False,
|
666
|
-
container="prefect-job",
|
667
|
-
)
|
668
|
-
try:
|
669
|
-
for log in logs.stream():
|
670
|
-
print(log.decode().rstrip())
|
671
|
-
|
672
|
-
# Check if we have passed the deadline and should stop streaming
|
673
|
-
# logs
|
674
|
-
remaining_time = (
|
675
|
-
deadline - time.monotonic() if deadline else None
|
676
|
-
)
|
677
|
-
if deadline and remaining_time <= 0:
|
678
|
-
break
|
679
|
-
|
680
|
-
except Exception:
|
681
|
-
self.logger.warning(
|
682
|
-
(
|
683
|
-
"Error occurred while streaming logs - "
|
684
|
-
"Job will continue to run but logs will "
|
685
|
-
"no longer be streamed to stdout."
|
686
|
-
),
|
687
|
-
exc_info=True,
|
688
|
-
)
|
689
|
-
|
690
|
-
with self.get_batch_client() as batch_client:
|
691
|
-
# Check if the job is completed before beginning a watch
|
692
|
-
job = batch_client.read_namespaced_job(
|
693
|
-
name=job_name, namespace=self.namespace
|
694
|
-
)
|
695
|
-
completed = job.status.completion_time is not None
|
696
|
-
|
697
|
-
while not completed:
|
698
|
-
remaining_time = (
|
699
|
-
math.ceil(deadline - time.monotonic()) if deadline else None
|
700
|
-
)
|
701
|
-
if deadline and remaining_time <= 0:
|
702
|
-
self.logger.error(
|
703
|
-
f"Job {job_name!r}: Job did not complete within "
|
704
|
-
f"timeout of {self.job_watch_timeout_seconds}s."
|
705
|
-
)
|
706
|
-
return -1
|
707
|
-
|
708
|
-
watch = kubernetes.watch.Watch()
|
709
|
-
# The kubernetes library will disable retries if the timeout kwarg is
|
710
|
-
# present regardless of the value so we do not pass it unless given
|
711
|
-
# https://github.com/kubernetes-client/python/blob/84f5fea2a3e4b161917aa597bf5e5a1d95e24f5a/kubernetes/base/watch/watch.py#LL160
|
712
|
-
timeout_seconds = (
|
713
|
-
{"timeout_seconds": remaining_time} if deadline else {}
|
714
|
-
)
|
715
|
-
|
716
|
-
for event in watch.stream(
|
717
|
-
func=batch_client.list_namespaced_job,
|
718
|
-
field_selector=f"metadata.name={job_name}",
|
719
|
-
namespace=self.namespace,
|
720
|
-
**timeout_seconds,
|
721
|
-
):
|
722
|
-
if event["type"] == "DELETED":
|
723
|
-
self.logger.error(f"Job {job_name!r}: Job has been deleted.")
|
724
|
-
completed = True
|
725
|
-
elif event["object"].status.completion_time:
|
726
|
-
if not event["object"].status.succeeded:
|
727
|
-
# Job failed, exit while loop and return pod exit code
|
728
|
-
self.logger.error(f"Job {job_name!r}: Job failed.")
|
729
|
-
completed = True
|
730
|
-
# Check if the job has reached its backoff limit
|
731
|
-
# and stop watching if it has
|
732
|
-
elif (
|
733
|
-
event["object"].spec.backoff_limit is not None
|
734
|
-
and event["object"].status.failed is not None
|
735
|
-
and event["object"].status.failed
|
736
|
-
> event["object"].spec.backoff_limit
|
737
|
-
):
|
738
|
-
self.logger.error(
|
739
|
-
f"Job {job_name!r}: Job reached backoff limit."
|
740
|
-
)
|
741
|
-
completed = True
|
742
|
-
# If the job has no backoff limit, check if it has failed
|
743
|
-
# and stop watching if it has
|
744
|
-
elif (
|
745
|
-
not event["object"].spec.backoff_limit
|
746
|
-
and event["object"].status.failed
|
747
|
-
):
|
748
|
-
completed = True
|
749
|
-
|
750
|
-
if completed:
|
751
|
-
watch.stop()
|
752
|
-
break
|
753
|
-
|
754
|
-
with self.get_client() as core_client:
|
755
|
-
# Get all pods for the job
|
756
|
-
pods = core_client.list_namespaced_pod(
|
757
|
-
namespace=self.namespace, label_selector=f"job-name={job_name}"
|
758
|
-
)
|
759
|
-
# Get the status for only the most recently used pod
|
760
|
-
pods.items.sort(
|
761
|
-
key=lambda pod: pod.metadata.creation_timestamp, reverse=True
|
762
|
-
)
|
763
|
-
most_recent_pod = pods.items[0] if pods.items else None
|
764
|
-
first_container_status = (
|
765
|
-
most_recent_pod.status.container_statuses[0]
|
766
|
-
if most_recent_pod
|
767
|
-
else None
|
768
|
-
)
|
769
|
-
if not first_container_status:
|
770
|
-
self.logger.error(f"Job {job_name!r}: No pods found for job.")
|
771
|
-
return -1
|
772
|
-
|
773
|
-
# In some cases, such as spot instance evictions, the pod will be forcibly
|
774
|
-
# terminated and not report a status correctly.
|
775
|
-
elif (
|
776
|
-
first_container_status.state is None
|
777
|
-
or first_container_status.state.terminated is None
|
778
|
-
or first_container_status.state.terminated.exit_code is None
|
779
|
-
):
|
780
|
-
self.logger.error(
|
781
|
-
f"Could not determine exit code for {job_name!r}."
|
782
|
-
"Exit code will be reported as -1."
|
783
|
-
"First container status info did not report an exit code."
|
784
|
-
f"First container info: {first_container_status}."
|
785
|
-
)
|
786
|
-
return -1
|
787
|
-
|
788
|
-
return first_container_status.state.terminated.exit_code
|
789
|
-
|
790
|
-
def _create_job(self, job_manifest: KubernetesManifest) -> "V1Job":
|
791
|
-
"""
|
792
|
-
Given a Kubernetes Job Manifest, create the Job on the configured Kubernetes
|
793
|
-
cluster and return its name.
|
794
|
-
"""
|
795
|
-
with self.get_batch_client() as batch_client:
|
796
|
-
job = batch_client.create_namespaced_job(self.namespace, job_manifest)
|
797
|
-
return job
|
798
|
-
|
799
|
-
def _slugify_name(self, name: str) -> str:
|
800
|
-
"""
|
801
|
-
Slugify text for use as a name.
|
802
|
-
|
803
|
-
Keeps only alphanumeric characters and dashes, and caps the length
|
804
|
-
of the slug at 45 chars.
|
805
|
-
|
806
|
-
The 45 character length allows room for the k8s utility
|
807
|
-
"generateName" to generate a unique name from the slug while
|
808
|
-
keeping the total length of a name below 63 characters, which is
|
809
|
-
the limit for e.g. label names that follow RFC 1123 (hostnames) and
|
810
|
-
RFC 1035 (domain names).
|
811
|
-
|
812
|
-
Args:
|
813
|
-
name: The name of the job
|
814
|
-
|
815
|
-
Returns:
|
816
|
-
the slugified job name
|
817
|
-
"""
|
818
|
-
slug = slugify(
|
819
|
-
name,
|
820
|
-
max_length=45, # Leave enough space for generateName
|
821
|
-
regex_pattern=r"[^a-zA-Z0-9-]+",
|
822
|
-
)
|
823
|
-
|
824
|
-
# TODO: Handle the case that the name is an empty string after being
|
825
|
-
# slugified.
|
826
|
-
|
827
|
-
return slug
|
828
|
-
|
829
|
-
def _slugify_label_key(self, key: str) -> str:
|
830
|
-
"""
|
831
|
-
Slugify text for use as a label key.
|
832
|
-
|
833
|
-
Keys are composed of an optional prefix and name, separated by a slash (/).
|
834
|
-
|
835
|
-
Keeps only alphanumeric characters, dashes, underscores, and periods.
|
836
|
-
Limits the length of the label prefix to 253 characters.
|
837
|
-
Limits the length of the label name to 63 characters.
|
838
|
-
|
839
|
-
See https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
|
840
|
-
|
841
|
-
Args:
|
842
|
-
key: The label key
|
843
|
-
|
844
|
-
Returns:
|
845
|
-
The slugified label key
|
846
|
-
"""
|
847
|
-
if "/" in key:
|
848
|
-
prefix, name = key.split("/", maxsplit=1)
|
849
|
-
else:
|
850
|
-
prefix = None
|
851
|
-
name = key
|
852
|
-
|
853
|
-
name_slug = (
|
854
|
-
slugify(name, max_length=63, regex_pattern=r"[^a-zA-Z0-9-_.]+").strip(
|
855
|
-
"_-." # Must start or end with alphanumeric characters
|
856
|
-
)
|
857
|
-
or name
|
858
|
-
)
|
859
|
-
# Fallback to the original if we end up with an empty slug, this will allow
|
860
|
-
# Kubernetes to throw the validation error
|
861
|
-
|
862
|
-
if prefix:
|
863
|
-
prefix_slug = (
|
864
|
-
slugify(
|
865
|
-
prefix,
|
866
|
-
max_length=253,
|
867
|
-
regex_pattern=r"[^a-zA-Z0-9-\.]+",
|
868
|
-
).strip("_-.") # Must start or end with alphanumeric characters
|
869
|
-
or prefix
|
870
|
-
)
|
871
|
-
|
872
|
-
return f"{prefix_slug}/{name_slug}"
|
873
|
-
|
874
|
-
return name_slug
|
875
|
-
|
876
|
-
def _slugify_label_value(self, value: str) -> str:
|
877
|
-
"""
|
878
|
-
Slugify text for use as a label value.
|
879
|
-
|
880
|
-
Keeps only alphanumeric characters, dashes, underscores, and periods.
|
881
|
-
Limits the total length of label text to below 63 characters.
|
882
|
-
|
883
|
-
See https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
|
884
|
-
|
885
|
-
Args:
|
886
|
-
value: The text for the label
|
887
|
-
|
888
|
-
Returns:
|
889
|
-
The slugified value
|
890
|
-
"""
|
891
|
-
slug = (
|
892
|
-
slugify(value, max_length=63, regex_pattern=r"[^a-zA-Z0-9-_\.]+").strip(
|
893
|
-
"_-." # Must start or end with alphanumeric characters
|
894
|
-
)
|
895
|
-
or value
|
896
|
-
)
|
897
|
-
# Fallback to the original if we end up with an empty slug, this will allow
|
898
|
-
# Kubernetes to throw the validation error
|
899
|
-
|
900
|
-
return slug
|
901
|
-
|
902
|
-
def _get_environment_variables(self):
|
903
|
-
# If the API URL has been set by the base environment rather than the by the
|
904
|
-
# user, update the value to ensure connectivity when using a bridge network by
|
905
|
-
# updating local connections to use the internal host
|
906
|
-
env = {**self._base_environment(), **self.env}
|
907
|
-
|
908
|
-
if (
|
909
|
-
"PREFECT_API_URL" in env
|
910
|
-
and "PREFECT_API_URL" not in self.env
|
911
|
-
and self._api_dns_name
|
912
|
-
):
|
913
|
-
env["PREFECT_API_URL"] = (
|
914
|
-
env["PREFECT_API_URL"]
|
915
|
-
.replace("localhost", self._api_dns_name)
|
916
|
-
.replace("127.0.0.1", self._api_dns_name)
|
917
|
-
)
|
918
|
-
|
919
|
-
# Drop null values allowing users to "unset" variables
|
920
|
-
return {key: value for key, value in env.items() if value is not None}
|