flyte 0.1.0__py3-none-any.whl → 0.2.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flyte might be problematic. Click here for more details.
- flyte/__init__.py +78 -2
- flyte/_bin/__init__.py +0 -0
- flyte/_bin/runtime.py +152 -0
- flyte/_build.py +26 -0
- flyte/_cache/__init__.py +12 -0
- flyte/_cache/cache.py +145 -0
- flyte/_cache/defaults.py +9 -0
- flyte/_cache/policy_function_body.py +42 -0
- flyte/_code_bundle/__init__.py +8 -0
- flyte/_code_bundle/_ignore.py +113 -0
- flyte/_code_bundle/_packaging.py +187 -0
- flyte/_code_bundle/_utils.py +323 -0
- flyte/_code_bundle/bundle.py +209 -0
- flyte/_context.py +152 -0
- flyte/_deploy.py +243 -0
- flyte/_doc.py +29 -0
- flyte/_docstring.py +32 -0
- flyte/_environment.py +84 -0
- flyte/_excepthook.py +37 -0
- flyte/_group.py +32 -0
- flyte/_hash.py +23 -0
- flyte/_image.py +762 -0
- flyte/_initialize.py +492 -0
- flyte/_interface.py +84 -0
- flyte/_internal/__init__.py +3 -0
- flyte/_internal/controllers/__init__.py +128 -0
- flyte/_internal/controllers/_local_controller.py +193 -0
- flyte/_internal/controllers/_trace.py +41 -0
- flyte/_internal/controllers/remote/__init__.py +60 -0
- flyte/_internal/controllers/remote/_action.py +146 -0
- flyte/_internal/controllers/remote/_client.py +47 -0
- flyte/_internal/controllers/remote/_controller.py +494 -0
- flyte/_internal/controllers/remote/_core.py +410 -0
- flyte/_internal/controllers/remote/_informer.py +361 -0
- flyte/_internal/controllers/remote/_service_protocol.py +50 -0
- flyte/_internal/imagebuild/__init__.py +11 -0
- flyte/_internal/imagebuild/docker_builder.py +427 -0
- flyte/_internal/imagebuild/image_builder.py +246 -0
- flyte/_internal/imagebuild/remote_builder.py +0 -0
- flyte/_internal/resolvers/__init__.py +0 -0
- flyte/_internal/resolvers/_task_module.py +54 -0
- flyte/_internal/resolvers/common.py +31 -0
- flyte/_internal/resolvers/default.py +28 -0
- flyte/_internal/runtime/__init__.py +0 -0
- flyte/_internal/runtime/convert.py +342 -0
- flyte/_internal/runtime/entrypoints.py +135 -0
- flyte/_internal/runtime/io.py +136 -0
- flyte/_internal/runtime/resources_serde.py +138 -0
- flyte/_internal/runtime/task_serde.py +330 -0
- flyte/_internal/runtime/taskrunner.py +191 -0
- flyte/_internal/runtime/types_serde.py +54 -0
- flyte/_logging.py +135 -0
- flyte/_map.py +215 -0
- flyte/_pod.py +19 -0
- flyte/_protos/__init__.py +0 -0
- flyte/_protos/common/authorization_pb2.py +66 -0
- flyte/_protos/common/authorization_pb2.pyi +108 -0
- flyte/_protos/common/authorization_pb2_grpc.py +4 -0
- flyte/_protos/common/identifier_pb2.py +71 -0
- flyte/_protos/common/identifier_pb2.pyi +82 -0
- flyte/_protos/common/identifier_pb2_grpc.py +4 -0
- flyte/_protos/common/identity_pb2.py +48 -0
- flyte/_protos/common/identity_pb2.pyi +72 -0
- flyte/_protos/common/identity_pb2_grpc.py +4 -0
- flyte/_protos/common/list_pb2.py +36 -0
- flyte/_protos/common/list_pb2.pyi +71 -0
- flyte/_protos/common/list_pb2_grpc.py +4 -0
- flyte/_protos/common/policy_pb2.py +37 -0
- flyte/_protos/common/policy_pb2.pyi +27 -0
- flyte/_protos/common/policy_pb2_grpc.py +4 -0
- flyte/_protos/common/role_pb2.py +37 -0
- flyte/_protos/common/role_pb2.pyi +53 -0
- flyte/_protos/common/role_pb2_grpc.py +4 -0
- flyte/_protos/common/runtime_version_pb2.py +28 -0
- flyte/_protos/common/runtime_version_pb2.pyi +24 -0
- flyte/_protos/common/runtime_version_pb2_grpc.py +4 -0
- flyte/_protos/logs/dataplane/payload_pb2.py +100 -0
- flyte/_protos/logs/dataplane/payload_pb2.pyi +177 -0
- flyte/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
- flyte/_protos/secret/definition_pb2.py +49 -0
- flyte/_protos/secret/definition_pb2.pyi +93 -0
- flyte/_protos/secret/definition_pb2_grpc.py +4 -0
- flyte/_protos/secret/payload_pb2.py +62 -0
- flyte/_protos/secret/payload_pb2.pyi +94 -0
- flyte/_protos/secret/payload_pb2_grpc.py +4 -0
- flyte/_protos/secret/secret_pb2.py +38 -0
- flyte/_protos/secret/secret_pb2.pyi +6 -0
- flyte/_protos/secret/secret_pb2_grpc.py +198 -0
- flyte/_protos/secret/secret_pb2_grpc_grpc.py +198 -0
- flyte/_protos/validate/validate/validate_pb2.py +76 -0
- flyte/_protos/workflow/common_pb2.py +27 -0
- flyte/_protos/workflow/common_pb2.pyi +14 -0
- flyte/_protos/workflow/common_pb2_grpc.py +4 -0
- flyte/_protos/workflow/environment_pb2.py +29 -0
- flyte/_protos/workflow/environment_pb2.pyi +12 -0
- flyte/_protos/workflow/environment_pb2_grpc.py +4 -0
- flyte/_protos/workflow/node_execution_service_pb2.py +26 -0
- flyte/_protos/workflow/node_execution_service_pb2.pyi +4 -0
- flyte/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
- flyte/_protos/workflow/queue_service_pb2.py +105 -0
- flyte/_protos/workflow/queue_service_pb2.pyi +146 -0
- flyte/_protos/workflow/queue_service_pb2_grpc.py +172 -0
- flyte/_protos/workflow/run_definition_pb2.py +128 -0
- flyte/_protos/workflow/run_definition_pb2.pyi +314 -0
- flyte/_protos/workflow/run_definition_pb2_grpc.py +4 -0
- flyte/_protos/workflow/run_logs_service_pb2.py +41 -0
- flyte/_protos/workflow/run_logs_service_pb2.pyi +28 -0
- flyte/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
- flyte/_protos/workflow/run_service_pb2.py +129 -0
- flyte/_protos/workflow/run_service_pb2.pyi +171 -0
- flyte/_protos/workflow/run_service_pb2_grpc.py +412 -0
- flyte/_protos/workflow/state_service_pb2.py +66 -0
- flyte/_protos/workflow/state_service_pb2.pyi +75 -0
- flyte/_protos/workflow/state_service_pb2_grpc.py +138 -0
- flyte/_protos/workflow/task_definition_pb2.py +79 -0
- flyte/_protos/workflow/task_definition_pb2.pyi +81 -0
- flyte/_protos/workflow/task_definition_pb2_grpc.py +4 -0
- flyte/_protos/workflow/task_service_pb2.py +60 -0
- flyte/_protos/workflow/task_service_pb2.pyi +59 -0
- flyte/_protos/workflow/task_service_pb2_grpc.py +138 -0
- flyte/_resources.py +226 -0
- flyte/_retry.py +32 -0
- flyte/_reusable_environment.py +25 -0
- flyte/_run.py +482 -0
- flyte/_secret.py +61 -0
- flyte/_task.py +449 -0
- flyte/_task_environment.py +183 -0
- flyte/_timeout.py +47 -0
- flyte/_tools.py +27 -0
- flyte/_trace.py +120 -0
- flyte/_utils/__init__.py +26 -0
- flyte/_utils/asyn.py +119 -0
- flyte/_utils/async_cache.py +139 -0
- flyte/_utils/coro_management.py +23 -0
- flyte/_utils/file_handling.py +72 -0
- flyte/_utils/helpers.py +134 -0
- flyte/_utils/lazy_module.py +54 -0
- flyte/_utils/org_discovery.py +57 -0
- flyte/_utils/uv_script_parser.py +49 -0
- flyte/_version.py +21 -0
- flyte/cli/__init__.py +3 -0
- flyte/cli/_abort.py +28 -0
- flyte/cli/_common.py +337 -0
- flyte/cli/_create.py +145 -0
- flyte/cli/_delete.py +23 -0
- flyte/cli/_deploy.py +152 -0
- flyte/cli/_gen.py +163 -0
- flyte/cli/_get.py +310 -0
- flyte/cli/_params.py +538 -0
- flyte/cli/_run.py +231 -0
- flyte/cli/main.py +166 -0
- flyte/config/__init__.py +3 -0
- flyte/config/_config.py +216 -0
- flyte/config/_internal.py +64 -0
- flyte/config/_reader.py +207 -0
- flyte/connectors/__init__.py +0 -0
- flyte/errors.py +172 -0
- flyte/extras/__init__.py +5 -0
- flyte/extras/_container.py +263 -0
- flyte/io/__init__.py +27 -0
- flyte/io/_dir.py +448 -0
- flyte/io/_file.py +467 -0
- flyte/io/_structured_dataset/__init__.py +129 -0
- flyte/io/_structured_dataset/basic_dfs.py +219 -0
- flyte/io/_structured_dataset/structured_dataset.py +1061 -0
- flyte/models.py +391 -0
- flyte/remote/__init__.py +26 -0
- flyte/remote/_client/__init__.py +0 -0
- flyte/remote/_client/_protocols.py +133 -0
- flyte/remote/_client/auth/__init__.py +12 -0
- flyte/remote/_client/auth/_auth_utils.py +14 -0
- flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
- flyte/remote/_client/auth/_authenticators/base.py +397 -0
- flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
- flyte/remote/_client/auth/_authenticators/device_code.py +118 -0
- flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
- flyte/remote/_client/auth/_authenticators/factory.py +200 -0
- flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
- flyte/remote/_client/auth/_channel.py +215 -0
- flyte/remote/_client/auth/_client_config.py +83 -0
- flyte/remote/_client/auth/_default_html.py +32 -0
- flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
- flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
- flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
- flyte/remote/_client/auth/_keyring.py +143 -0
- flyte/remote/_client/auth/_token_client.py +260 -0
- flyte/remote/_client/auth/errors.py +16 -0
- flyte/remote/_client/controlplane.py +95 -0
- flyte/remote/_console.py +18 -0
- flyte/remote/_data.py +159 -0
- flyte/remote/_logs.py +176 -0
- flyte/remote/_project.py +85 -0
- flyte/remote/_run.py +970 -0
- flyte/remote/_secret.py +132 -0
- flyte/remote/_task.py +391 -0
- flyte/report/__init__.py +3 -0
- flyte/report/_report.py +178 -0
- flyte/report/_template.html +124 -0
- flyte/storage/__init__.py +29 -0
- flyte/storage/_config.py +233 -0
- flyte/storage/_remote_fs.py +34 -0
- flyte/storage/_storage.py +271 -0
- flyte/storage/_utils.py +5 -0
- flyte/syncify/__init__.py +56 -0
- flyte/syncify/_api.py +371 -0
- flyte/types/__init__.py +36 -0
- flyte/types/_interface.py +40 -0
- flyte/types/_pickle.py +118 -0
- flyte/types/_renderer.py +162 -0
- flyte/types/_string_literals.py +120 -0
- flyte/types/_type_engine.py +2287 -0
- flyte/types/_utils.py +80 -0
- flyte-0.2.0a0.dist-info/METADATA +249 -0
- flyte-0.2.0a0.dist-info/RECORD +218 -0
- {flyte-0.1.0.dist-info → flyte-0.2.0a0.dist-info}/WHEEL +2 -1
- flyte-0.2.0a0.dist-info/entry_points.txt +3 -0
- flyte-0.2.0a0.dist-info/top_level.txt +1 -0
- flyte-0.1.0.dist-info/METADATA +0 -6
- flyte-0.1.0.dist-info/RECORD +0 -5
flyte/_resources.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Literal, Optional, Tuple, Union, get_args
|
|
3
|
+
|
|
4
|
+
import rich.repr
|
|
5
|
+
|
|
6
|
+
GPUType = Literal["T4", "A100", "A100 80G", "H100", "L4", "L40s"]
|
|
7
|
+
GPUQuantity = Literal[1, 2, 3, 4, 5, 6, 7, 8]
|
|
8
|
+
A100Parts = Literal["1g.5gb", "2g.10gb", "3g.20gb", "4g.20gb", "7g.40gb"]
|
|
9
|
+
"""
|
|
10
|
+
Partitions for NVIDIA A100 GPU.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
A100_80GBParts = Literal["1g.10gb", "2g.20gb", "3g.40gb", "4g.40gb", "7g.80gb"]
|
|
14
|
+
"""
|
|
15
|
+
Partitions for NVIDIA A100 80GB GPU.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
TPUType = Literal["V5P", "V6E"]
|
|
19
|
+
V5EParts = Literal["1x1", "2x2", "2x4", "4x4", "4x8", "8x8", "8x16", "16x16"]
|
|
20
|
+
|
|
21
|
+
V5PParts = Literal[
|
|
22
|
+
"2x2x1", "2x2x2", "2x4x4", "4x4x4", "4x4x8", "4x8x8", "8x8x8", "8x8x16", "8x16x16", "16x16x16", "16x16x24"
|
|
23
|
+
]
|
|
24
|
+
"""
|
|
25
|
+
Slices for Google Cloud TPU v5p.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
V6EParts = Literal["1x1", "2x2", "2x4", "4x4", "4x8", "8x8", "8x16", "16x16"]
|
|
29
|
+
"""
|
|
30
|
+
Slices for Google Cloud TPU v6e.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
Accelerators = Literal[
|
|
34
|
+
"T4:1",
|
|
35
|
+
"T4:2",
|
|
36
|
+
"T4:3",
|
|
37
|
+
"T4:4",
|
|
38
|
+
"T4:5",
|
|
39
|
+
"T4:6",
|
|
40
|
+
"T4:7",
|
|
41
|
+
"T4:8",
|
|
42
|
+
"L4:1",
|
|
43
|
+
"L4:2",
|
|
44
|
+
"L4:3",
|
|
45
|
+
"L4:4",
|
|
46
|
+
"L4:5",
|
|
47
|
+
"L4:6",
|
|
48
|
+
"L4:7",
|
|
49
|
+
"L4:8",
|
|
50
|
+
"L40s:1",
|
|
51
|
+
"L40s:2",
|
|
52
|
+
"L40s:3",
|
|
53
|
+
"L40s:4",
|
|
54
|
+
"L40s:5",
|
|
55
|
+
"L40s:6",
|
|
56
|
+
"L40s:7",
|
|
57
|
+
"L40s:8",
|
|
58
|
+
"A100:1",
|
|
59
|
+
"A100:2",
|
|
60
|
+
"A100:3",
|
|
61
|
+
"A100:4",
|
|
62
|
+
"A100:5",
|
|
63
|
+
"A100:6",
|
|
64
|
+
"A100:7",
|
|
65
|
+
"A100:8",
|
|
66
|
+
"A100 80G:1",
|
|
67
|
+
"A100 80G:2",
|
|
68
|
+
"A100 80G:3",
|
|
69
|
+
"A100 80G:4",
|
|
70
|
+
"A100 80G:5",
|
|
71
|
+
"A100 80G:6",
|
|
72
|
+
"A100 80G:7",
|
|
73
|
+
"A100 80G:8",
|
|
74
|
+
"H100:1",
|
|
75
|
+
"H100:2",
|
|
76
|
+
"H100:3",
|
|
77
|
+
"H100:4",
|
|
78
|
+
"H100:5",
|
|
79
|
+
"H100:6",
|
|
80
|
+
"H100:7",
|
|
81
|
+
"H100:8",
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@rich.repr.auto
|
|
86
|
+
@dataclass(frozen=True, slots=True)
|
|
87
|
+
class Device:
|
|
88
|
+
"""
|
|
89
|
+
Represents a device type, its quantity and partition if applicable.
|
|
90
|
+
:param device: The type of device (e.g., "T4", "A100").
|
|
91
|
+
:param quantity: The number of devices of this type.
|
|
92
|
+
:param partition: The partition of the device (e.g., "1g.5gb", "2g.10gb" for gpus) or ("1x1", ... for tpus).
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
quantity: int
|
|
96
|
+
device: str | None = None
|
|
97
|
+
partition: str | None = None
|
|
98
|
+
|
|
99
|
+
def __post_init__(self):
|
|
100
|
+
if self.quantity < 1:
|
|
101
|
+
raise ValueError("GPU quantity must be at least 1")
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def GPU(device: GPUType, quantity: GPUQuantity, partition: A100Parts | A100_80GBParts | None = None) -> Device:
|
|
105
|
+
"""
|
|
106
|
+
Create a GPU device instance.
|
|
107
|
+
:param device: The type of GPU (e.g., "T4", "A100").
|
|
108
|
+
:param quantity: The number of GPUs of this type.
|
|
109
|
+
:param partition: The partition of the GPU (e.g., "1g.5gb", "2g.10gb" for gpus) or ("1x1", ... for tpus).
|
|
110
|
+
:return: Device instance.
|
|
111
|
+
"""
|
|
112
|
+
if quantity < 1:
|
|
113
|
+
raise ValueError("GPU quantity must be at least 1")
|
|
114
|
+
if device not in get_args(GPUType):
|
|
115
|
+
raise ValueError(f"Invalid GPU type: {device}. Must be one of {get_args(GPUType)}")
|
|
116
|
+
if partition is not None and device == "A100":
|
|
117
|
+
if partition not in get_args(A100Parts):
|
|
118
|
+
raise ValueError(f"Invalid partition for A100: {partition}. Must be one of {get_args(A100Parts)}")
|
|
119
|
+
elif partition is not None and device == "A100 80G":
|
|
120
|
+
if partition not in get_args(A100_80GBParts):
|
|
121
|
+
raise ValueError(f"Invalid partition for A100 80G: {partition}. Must be one of {get_args(A100_80GBParts)}")
|
|
122
|
+
return Device(device=device, quantity=quantity, partition=partition)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def TPU(device: TPUType, partition: V5PParts | V6EParts | None = None):
|
|
126
|
+
"""
|
|
127
|
+
Create a TPU device instance.
|
|
128
|
+
:param device: Device type (e.g., "V5P", "V6E").
|
|
129
|
+
:param partition: Partition of the TPU (e.g., "1x1", "2x2", ...).
|
|
130
|
+
:return: Device instance.
|
|
131
|
+
"""
|
|
132
|
+
if device not in get_args(TPUType):
|
|
133
|
+
raise ValueError(f"Invalid TPU type: {device}. Must be one of {get_args(TPUType)}")
|
|
134
|
+
if partition is not None and device == "V5P":
|
|
135
|
+
if partition not in get_args(V5PParts):
|
|
136
|
+
raise ValueError(f"Invalid partition for V5P: {partition}. Must be one of {get_args(V5PParts)}")
|
|
137
|
+
elif partition is not None and device == "V6E":
|
|
138
|
+
if partition not in get_args(V6EParts):
|
|
139
|
+
raise ValueError(f"Invalid partition for V6E: {partition}. Must be one of {get_args(V6EParts)}")
|
|
140
|
+
elif partition is not None and device == "V5E":
|
|
141
|
+
if partition not in get_args(V5EParts):
|
|
142
|
+
raise ValueError(f"Invalid partition for V5E: {partition}. Must be one of {get_args(V5EParts)}")
|
|
143
|
+
return Device(1, device, partition)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
CPUBaseType = int | float | str
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@dataclass
|
|
150
|
+
class Resources:
|
|
151
|
+
"""
|
|
152
|
+
Resources such as CPU, Memory, and GPU that can be allocated to a task.
|
|
153
|
+
|
|
154
|
+
Example:
|
|
155
|
+
- Single CPU, 1GiB of memory, and 1 T4 GPU:
|
|
156
|
+
```python
|
|
157
|
+
@task(resources=Resources(cpu=1, memory="1GiB", gpu="T4:1"))
|
|
158
|
+
def my_task() -> int:
|
|
159
|
+
return 42
|
|
160
|
+
```
|
|
161
|
+
- 1CPU with limit upto 2CPU, 2GiB of memory, and 8 A100 GPUs and 10GiB of disk:
|
|
162
|
+
```python
|
|
163
|
+
@task(resources=Resources(cpu=(1, 2), memory="2GiB", gpu="A100:8", disk="10GiB"))
|
|
164
|
+
def my_task() -> int:
|
|
165
|
+
return 42
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
:param cpu: The amount of CPU to allocate to the task. This can be a string, int, float, list of ints or strings,
|
|
169
|
+
or a tuple of two ints or strings.
|
|
170
|
+
:param memory: The amount of memory to allocate to the task. This can be a string, int, float, list of ints or
|
|
171
|
+
strings, or a tuple of two ints or strings.
|
|
172
|
+
:param gpu: The amount of GPU to allocate to the task. This can be an Accelerators enum, an int, or None.
|
|
173
|
+
:param disk: The amount of disk to allocate to the task. This is a string of the form "10GiB".
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
cpu: Union[CPUBaseType, Tuple[CPUBaseType, CPUBaseType], None] = None
|
|
177
|
+
memory: Union[str, Tuple[str, str], None] = None
|
|
178
|
+
gpu: Union[Accelerators, int, Device, None] = None
|
|
179
|
+
disk: Union[str, None] = None
|
|
180
|
+
shm: Union[str, Literal["auto"], None] = None
|
|
181
|
+
|
|
182
|
+
def __post_init__(self):
|
|
183
|
+
if isinstance(self.cpu, tuple):
|
|
184
|
+
if len(self.cpu) != 2:
|
|
185
|
+
raise ValueError("cpu tuple must have exactly two elements")
|
|
186
|
+
if isinstance(self.memory, tuple):
|
|
187
|
+
if len(self.memory) != 2:
|
|
188
|
+
raise ValueError("memory tuple must have exactly two elements")
|
|
189
|
+
if isinstance(self.cpu, (int, float)):
|
|
190
|
+
if self.cpu < 0:
|
|
191
|
+
raise ValueError("cpu must be greater than or equal to 0")
|
|
192
|
+
if self.gpu is not None:
|
|
193
|
+
if isinstance(self.gpu, int):
|
|
194
|
+
if self.gpu < 0:
|
|
195
|
+
raise ValueError("gpu must be greater than or equal to 0")
|
|
196
|
+
elif isinstance(self.gpu, str):
|
|
197
|
+
if self.gpu not in get_args(Accelerators):
|
|
198
|
+
raise ValueError(f"gpu must be one of {Accelerators}")
|
|
199
|
+
|
|
200
|
+
def get_device(self) -> Optional[Device]:
|
|
201
|
+
"""
|
|
202
|
+
Get the accelerator string for the task.
|
|
203
|
+
|
|
204
|
+
:return: If GPUs are requested, return a tuple of the device name, and potentially a partition string.
|
|
205
|
+
Default cloud provider labels typically use the following values: `1g.5gb`, `2g.10gb`, etc.
|
|
206
|
+
"""
|
|
207
|
+
if self.gpu is None:
|
|
208
|
+
return None
|
|
209
|
+
if isinstance(self.gpu, int):
|
|
210
|
+
return Device(quantity=self.gpu)
|
|
211
|
+
if isinstance(self.gpu, str):
|
|
212
|
+
device, portion = self.gpu.split(":")
|
|
213
|
+
return Device(device=device, quantity=int(portion))
|
|
214
|
+
return self.gpu
|
|
215
|
+
|
|
216
|
+
def get_shared_memory(self) -> Optional[str]:
|
|
217
|
+
"""
|
|
218
|
+
Get the shared memory string for the task.
|
|
219
|
+
|
|
220
|
+
:return: The shared memory string.
|
|
221
|
+
"""
|
|
222
|
+
if self.shm is None:
|
|
223
|
+
return None
|
|
224
|
+
if self.shm == "auto":
|
|
225
|
+
return ""
|
|
226
|
+
return self.shm
|
flyte/_retry.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from datetime import timedelta
|
|
3
|
+
from typing import Union
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class RetryStrategy:
|
|
8
|
+
"""
|
|
9
|
+
Retry strategy for the task or task environment. Retry strategy is optional or can be a simple number of retries.
|
|
10
|
+
|
|
11
|
+
Example:
|
|
12
|
+
- This will retry the task 5 times.
|
|
13
|
+
```
|
|
14
|
+
@task(retries=5)
|
|
15
|
+
def my_task():
|
|
16
|
+
pass
|
|
17
|
+
```
|
|
18
|
+
- This will retry the task 5 times with a maximum backoff of 10 seconds and a backoff factor of 2.
|
|
19
|
+
```
|
|
20
|
+
@task(retries=RetryStrategy(count=5, max_backoff=10, backoff=2))
|
|
21
|
+
def my_task():
|
|
22
|
+
pass
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
:param count: The number of retries.
|
|
26
|
+
:param backoff: The maximum backoff time for retries. This can be a float or a timedelta.
|
|
27
|
+
:param backoff: The backoff exponential factor. This can be an integer or a float.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
count: int
|
|
31
|
+
backoff: Union[float, timedelta, None] = None
|
|
32
|
+
backoff_factor: Union[int, float, None] = None
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from datetime import timedelta
|
|
3
|
+
from typing import Optional, Tuple, Union
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class ReusePolicy:
|
|
8
|
+
"""
|
|
9
|
+
ReusePolicy can be used to configure a task to reuse the environment. This is useful when the environment creation
|
|
10
|
+
is expensive and the runtime of the task is short. The environment will be reused for the next invocation of the
|
|
11
|
+
task, even the python process maybe be reused by subsequent task invocations. A good mental model is to think of
|
|
12
|
+
the environment as a container that is reused for multiple tasks, more like a long-running service.
|
|
13
|
+
|
|
14
|
+
Caution: It is important to note that the environment is shared, so managing memory and resources is important.
|
|
15
|
+
|
|
16
|
+
:param replicas: Either a single int representing number of replicas or a tuple of two ints representing
|
|
17
|
+
the min and max
|
|
18
|
+
:param idle_ttl: The maximum idle duration for an environment replica, specified as either seconds (int) or a
|
|
19
|
+
timedelta. If not set, the environment's global default will be used.
|
|
20
|
+
When a replica remains idle — meaning no tasks are running — for this duration, it will be automatically
|
|
21
|
+
terminated.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
replicas: Union[int, Tuple[int, int]] = 1
|
|
25
|
+
idle_ttl: Optional[Union[int, timedelta]] = None
|