flyte 0.1.0__py3-none-any.whl → 0.2.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flyte might be problematic. Click here for more details.
- flyte/__init__.py +78 -2
- flyte/_bin/__init__.py +0 -0
- flyte/_bin/runtime.py +152 -0
- flyte/_build.py +26 -0
- flyte/_cache/__init__.py +12 -0
- flyte/_cache/cache.py +145 -0
- flyte/_cache/defaults.py +9 -0
- flyte/_cache/policy_function_body.py +42 -0
- flyte/_code_bundle/__init__.py +8 -0
- flyte/_code_bundle/_ignore.py +113 -0
- flyte/_code_bundle/_packaging.py +187 -0
- flyte/_code_bundle/_utils.py +323 -0
- flyte/_code_bundle/bundle.py +209 -0
- flyte/_context.py +152 -0
- flyte/_deploy.py +243 -0
- flyte/_doc.py +29 -0
- flyte/_docstring.py +32 -0
- flyte/_environment.py +84 -0
- flyte/_excepthook.py +37 -0
- flyte/_group.py +32 -0
- flyte/_hash.py +23 -0
- flyte/_image.py +762 -0
- flyte/_initialize.py +492 -0
- flyte/_interface.py +84 -0
- flyte/_internal/__init__.py +3 -0
- flyte/_internal/controllers/__init__.py +128 -0
- flyte/_internal/controllers/_local_controller.py +193 -0
- flyte/_internal/controllers/_trace.py +41 -0
- flyte/_internal/controllers/remote/__init__.py +60 -0
- flyte/_internal/controllers/remote/_action.py +146 -0
- flyte/_internal/controllers/remote/_client.py +47 -0
- flyte/_internal/controllers/remote/_controller.py +494 -0
- flyte/_internal/controllers/remote/_core.py +410 -0
- flyte/_internal/controllers/remote/_informer.py +361 -0
- flyte/_internal/controllers/remote/_service_protocol.py +50 -0
- flyte/_internal/imagebuild/__init__.py +11 -0
- flyte/_internal/imagebuild/docker_builder.py +427 -0
- flyte/_internal/imagebuild/image_builder.py +246 -0
- flyte/_internal/imagebuild/remote_builder.py +0 -0
- flyte/_internal/resolvers/__init__.py +0 -0
- flyte/_internal/resolvers/_task_module.py +54 -0
- flyte/_internal/resolvers/common.py +31 -0
- flyte/_internal/resolvers/default.py +28 -0
- flyte/_internal/runtime/__init__.py +0 -0
- flyte/_internal/runtime/convert.py +342 -0
- flyte/_internal/runtime/entrypoints.py +135 -0
- flyte/_internal/runtime/io.py +136 -0
- flyte/_internal/runtime/resources_serde.py +138 -0
- flyte/_internal/runtime/task_serde.py +330 -0
- flyte/_internal/runtime/taskrunner.py +191 -0
- flyte/_internal/runtime/types_serde.py +54 -0
- flyte/_logging.py +135 -0
- flyte/_map.py +215 -0
- flyte/_pod.py +19 -0
- flyte/_protos/__init__.py +0 -0
- flyte/_protos/common/authorization_pb2.py +66 -0
- flyte/_protos/common/authorization_pb2.pyi +108 -0
- flyte/_protos/common/authorization_pb2_grpc.py +4 -0
- flyte/_protos/common/identifier_pb2.py +71 -0
- flyte/_protos/common/identifier_pb2.pyi +82 -0
- flyte/_protos/common/identifier_pb2_grpc.py +4 -0
- flyte/_protos/common/identity_pb2.py +48 -0
- flyte/_protos/common/identity_pb2.pyi +72 -0
- flyte/_protos/common/identity_pb2_grpc.py +4 -0
- flyte/_protos/common/list_pb2.py +36 -0
- flyte/_protos/common/list_pb2.pyi +71 -0
- flyte/_protos/common/list_pb2_grpc.py +4 -0
- flyte/_protos/common/policy_pb2.py +37 -0
- flyte/_protos/common/policy_pb2.pyi +27 -0
- flyte/_protos/common/policy_pb2_grpc.py +4 -0
- flyte/_protos/common/role_pb2.py +37 -0
- flyte/_protos/common/role_pb2.pyi +53 -0
- flyte/_protos/common/role_pb2_grpc.py +4 -0
- flyte/_protos/common/runtime_version_pb2.py +28 -0
- flyte/_protos/common/runtime_version_pb2.pyi +24 -0
- flyte/_protos/common/runtime_version_pb2_grpc.py +4 -0
- flyte/_protos/logs/dataplane/payload_pb2.py +100 -0
- flyte/_protos/logs/dataplane/payload_pb2.pyi +177 -0
- flyte/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
- flyte/_protos/secret/definition_pb2.py +49 -0
- flyte/_protos/secret/definition_pb2.pyi +93 -0
- flyte/_protos/secret/definition_pb2_grpc.py +4 -0
- flyte/_protos/secret/payload_pb2.py +62 -0
- flyte/_protos/secret/payload_pb2.pyi +94 -0
- flyte/_protos/secret/payload_pb2_grpc.py +4 -0
- flyte/_protos/secret/secret_pb2.py +38 -0
- flyte/_protos/secret/secret_pb2.pyi +6 -0
- flyte/_protos/secret/secret_pb2_grpc.py +198 -0
- flyte/_protos/secret/secret_pb2_grpc_grpc.py +198 -0
- flyte/_protos/validate/validate/validate_pb2.py +76 -0
- flyte/_protos/workflow/common_pb2.py +27 -0
- flyte/_protos/workflow/common_pb2.pyi +14 -0
- flyte/_protos/workflow/common_pb2_grpc.py +4 -0
- flyte/_protos/workflow/environment_pb2.py +29 -0
- flyte/_protos/workflow/environment_pb2.pyi +12 -0
- flyte/_protos/workflow/environment_pb2_grpc.py +4 -0
- flyte/_protos/workflow/node_execution_service_pb2.py +26 -0
- flyte/_protos/workflow/node_execution_service_pb2.pyi +4 -0
- flyte/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
- flyte/_protos/workflow/queue_service_pb2.py +105 -0
- flyte/_protos/workflow/queue_service_pb2.pyi +146 -0
- flyte/_protos/workflow/queue_service_pb2_grpc.py +172 -0
- flyte/_protos/workflow/run_definition_pb2.py +128 -0
- flyte/_protos/workflow/run_definition_pb2.pyi +314 -0
- flyte/_protos/workflow/run_definition_pb2_grpc.py +4 -0
- flyte/_protos/workflow/run_logs_service_pb2.py +41 -0
- flyte/_protos/workflow/run_logs_service_pb2.pyi +28 -0
- flyte/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
- flyte/_protos/workflow/run_service_pb2.py +129 -0
- flyte/_protos/workflow/run_service_pb2.pyi +171 -0
- flyte/_protos/workflow/run_service_pb2_grpc.py +412 -0
- flyte/_protos/workflow/state_service_pb2.py +66 -0
- flyte/_protos/workflow/state_service_pb2.pyi +75 -0
- flyte/_protos/workflow/state_service_pb2_grpc.py +138 -0
- flyte/_protos/workflow/task_definition_pb2.py +79 -0
- flyte/_protos/workflow/task_definition_pb2.pyi +81 -0
- flyte/_protos/workflow/task_definition_pb2_grpc.py +4 -0
- flyte/_protos/workflow/task_service_pb2.py +60 -0
- flyte/_protos/workflow/task_service_pb2.pyi +59 -0
- flyte/_protos/workflow/task_service_pb2_grpc.py +138 -0
- flyte/_resources.py +226 -0
- flyte/_retry.py +32 -0
- flyte/_reusable_environment.py +25 -0
- flyte/_run.py +482 -0
- flyte/_secret.py +61 -0
- flyte/_task.py +449 -0
- flyte/_task_environment.py +183 -0
- flyte/_timeout.py +47 -0
- flyte/_tools.py +27 -0
- flyte/_trace.py +120 -0
- flyte/_utils/__init__.py +26 -0
- flyte/_utils/asyn.py +119 -0
- flyte/_utils/async_cache.py +139 -0
- flyte/_utils/coro_management.py +23 -0
- flyte/_utils/file_handling.py +72 -0
- flyte/_utils/helpers.py +134 -0
- flyte/_utils/lazy_module.py +54 -0
- flyte/_utils/org_discovery.py +57 -0
- flyte/_utils/uv_script_parser.py +49 -0
- flyte/_version.py +21 -0
- flyte/cli/__init__.py +3 -0
- flyte/cli/_abort.py +28 -0
- flyte/cli/_common.py +337 -0
- flyte/cli/_create.py +145 -0
- flyte/cli/_delete.py +23 -0
- flyte/cli/_deploy.py +152 -0
- flyte/cli/_gen.py +163 -0
- flyte/cli/_get.py +310 -0
- flyte/cli/_params.py +538 -0
- flyte/cli/_run.py +231 -0
- flyte/cli/main.py +166 -0
- flyte/config/__init__.py +3 -0
- flyte/config/_config.py +216 -0
- flyte/config/_internal.py +64 -0
- flyte/config/_reader.py +207 -0
- flyte/connectors/__init__.py +0 -0
- flyte/errors.py +172 -0
- flyte/extras/__init__.py +5 -0
- flyte/extras/_container.py +263 -0
- flyte/io/__init__.py +27 -0
- flyte/io/_dir.py +448 -0
- flyte/io/_file.py +467 -0
- flyte/io/_structured_dataset/__init__.py +129 -0
- flyte/io/_structured_dataset/basic_dfs.py +219 -0
- flyte/io/_structured_dataset/structured_dataset.py +1061 -0
- flyte/models.py +391 -0
- flyte/remote/__init__.py +26 -0
- flyte/remote/_client/__init__.py +0 -0
- flyte/remote/_client/_protocols.py +133 -0
- flyte/remote/_client/auth/__init__.py +12 -0
- flyte/remote/_client/auth/_auth_utils.py +14 -0
- flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
- flyte/remote/_client/auth/_authenticators/base.py +397 -0
- flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
- flyte/remote/_client/auth/_authenticators/device_code.py +118 -0
- flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
- flyte/remote/_client/auth/_authenticators/factory.py +200 -0
- flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
- flyte/remote/_client/auth/_channel.py +215 -0
- flyte/remote/_client/auth/_client_config.py +83 -0
- flyte/remote/_client/auth/_default_html.py +32 -0
- flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
- flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
- flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
- flyte/remote/_client/auth/_keyring.py +143 -0
- flyte/remote/_client/auth/_token_client.py +260 -0
- flyte/remote/_client/auth/errors.py +16 -0
- flyte/remote/_client/controlplane.py +95 -0
- flyte/remote/_console.py +18 -0
- flyte/remote/_data.py +159 -0
- flyte/remote/_logs.py +176 -0
- flyte/remote/_project.py +85 -0
- flyte/remote/_run.py +970 -0
- flyte/remote/_secret.py +132 -0
- flyte/remote/_task.py +391 -0
- flyte/report/__init__.py +3 -0
- flyte/report/_report.py +178 -0
- flyte/report/_template.html +124 -0
- flyte/storage/__init__.py +29 -0
- flyte/storage/_config.py +233 -0
- flyte/storage/_remote_fs.py +34 -0
- flyte/storage/_storage.py +271 -0
- flyte/storage/_utils.py +5 -0
- flyte/syncify/__init__.py +56 -0
- flyte/syncify/_api.py +371 -0
- flyte/types/__init__.py +36 -0
- flyte/types/_interface.py +40 -0
- flyte/types/_pickle.py +118 -0
- flyte/types/_renderer.py +162 -0
- flyte/types/_string_literals.py +120 -0
- flyte/types/_type_engine.py +2287 -0
- flyte/types/_utils.py +80 -0
- flyte-0.2.0a0.dist-info/METADATA +249 -0
- flyte-0.2.0a0.dist-info/RECORD +218 -0
- {flyte-0.1.0.dist-info → flyte-0.2.0a0.dist-info}/WHEEL +2 -1
- flyte-0.2.0a0.dist-info/entry_points.txt +3 -0
- flyte-0.2.0a0.dist-info/top_level.txt +1 -0
- flyte-0.1.0.dist-info/METADATA +0 -6
- flyte-0.1.0.dist-info/RECORD +0 -5
flyte/config/_reader.py
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pathlib
|
|
3
|
+
import typing
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from functools import lru_cache
|
|
6
|
+
from os import getenv
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import yaml
|
|
10
|
+
|
|
11
|
+
from flyte._logging import logger
|
|
12
|
+
|
|
13
|
+
# This is the default config file name for flyte
|
|
14
|
+
FLYTECTL_CONFIG_ENV_VAR = "FLYTECTL_CONFIG"
|
|
15
|
+
UCTL_CONFIG_ENV_VAR = "UCTL_CONFIG"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class YamlConfigEntry(object):
|
|
20
|
+
"""
|
|
21
|
+
Creates a record for the config entry.
|
|
22
|
+
Args:
|
|
23
|
+
switch: dot-delimited string that should match flytectl args. Leaving it as dot-delimited instead of a list
|
|
24
|
+
of strings because it's easier to maintain alignment with flytectl.
|
|
25
|
+
config_value_type: Expected type of the value
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
switch: str
|
|
29
|
+
config_value_type: typing.Type = str
|
|
30
|
+
|
|
31
|
+
def get_env_name(self) -> str:
|
|
32
|
+
var_name = self.switch.upper().replace(".", "_")
|
|
33
|
+
return f"FLYTE_{var_name}"
|
|
34
|
+
|
|
35
|
+
def read_from_env(self, transform: typing.Optional[typing.Callable] = None) -> typing.Optional[typing.Any]:
|
|
36
|
+
"""
|
|
37
|
+
Reads the config entry from environment variable, the structure of the env var is current
|
|
38
|
+
``FLYTE_{SECTION}_{OPTION}`` all upper cased. We will change this in the future.
|
|
39
|
+
:return:
|
|
40
|
+
"""
|
|
41
|
+
env = self.get_env_name()
|
|
42
|
+
v = os.environ.get(env, None)
|
|
43
|
+
if v is None:
|
|
44
|
+
return None
|
|
45
|
+
return transform(v) if transform else v
|
|
46
|
+
|
|
47
|
+
def read_from_file(
|
|
48
|
+
self, cfg: "ConfigFile", transform: typing.Optional[typing.Callable] = None
|
|
49
|
+
) -> typing.Optional[typing.Any]:
|
|
50
|
+
if not cfg:
|
|
51
|
+
return None
|
|
52
|
+
try:
|
|
53
|
+
v = cfg.get(self)
|
|
54
|
+
if isinstance(v, bool) or bool(v is not None and v):
|
|
55
|
+
return transform(v) if transform else v
|
|
56
|
+
except Exception:
|
|
57
|
+
...
|
|
58
|
+
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class ConfigEntry(object):
|
|
64
|
+
"""
|
|
65
|
+
A top level Config entry holder, that holds multiple different representations of the config.
|
|
66
|
+
Legacy means the INI style config files. YAML support is for the flytectl config file, which is there by default
|
|
67
|
+
when flytectl starts a sandbox
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
yaml_entry: YamlConfigEntry
|
|
71
|
+
transform: typing.Optional[typing.Callable[[str], typing.Any]] = None
|
|
72
|
+
|
|
73
|
+
def read(self, cfg: typing.Optional["ConfigFile"] = None) -> typing.Optional[typing.Any]:
|
|
74
|
+
"""
|
|
75
|
+
Reads the config Entry from the various sources in the following order,
|
|
76
|
+
#. First try to read from the relevant environment variable,
|
|
77
|
+
#. If missing, then try to read from the legacy config file, if one was parsed.
|
|
78
|
+
#. If missing, then try to read from the yaml file.
|
|
79
|
+
|
|
80
|
+
The constructor for ConfigFile currently does not allow specification of both the ini and yaml style formats.
|
|
81
|
+
|
|
82
|
+
:param cfg:
|
|
83
|
+
:return:
|
|
84
|
+
"""
|
|
85
|
+
from_env = self.yaml_entry.read_from_env(self.transform)
|
|
86
|
+
if from_env is not None:
|
|
87
|
+
return from_env
|
|
88
|
+
if cfg and cfg.yaml_config and self.yaml_entry:
|
|
89
|
+
return self.yaml_entry.read_from_file(cfg, self.transform)
|
|
90
|
+
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class ConfigFile(object):
|
|
95
|
+
def __init__(self, location: str):
|
|
96
|
+
"""
|
|
97
|
+
Load the config from this location
|
|
98
|
+
"""
|
|
99
|
+
self._location = location
|
|
100
|
+
self._yaml_config = self._read_yaml_config(location)
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def path(self) -> pathlib.Path:
|
|
104
|
+
"""
|
|
105
|
+
Returns the path to the config file.
|
|
106
|
+
:return: Path to the config file
|
|
107
|
+
"""
|
|
108
|
+
return pathlib.Path(self._location)
|
|
109
|
+
|
|
110
|
+
@staticmethod
|
|
111
|
+
def _read_yaml_config(location: str) -> typing.Optional[typing.Dict[str, typing.Any]]:
|
|
112
|
+
with open(location, "r") as fh:
|
|
113
|
+
try:
|
|
114
|
+
yaml_contents = yaml.safe_load(fh)
|
|
115
|
+
return yaml_contents
|
|
116
|
+
except yaml.YAMLError as exc:
|
|
117
|
+
logger.warning(f"Error {exc} reading yaml config file at {location}, ignoring...")
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
def _get_from_yaml(self, c: YamlConfigEntry) -> typing.Any:
|
|
121
|
+
keys = c.switch.split(".") # flytectl switches are dot delimited
|
|
122
|
+
d = typing.cast(typing.Dict[str, typing.Any], self.yaml_config)
|
|
123
|
+
try:
|
|
124
|
+
for k in keys:
|
|
125
|
+
d = d[k]
|
|
126
|
+
return d
|
|
127
|
+
except KeyError:
|
|
128
|
+
return None
|
|
129
|
+
|
|
130
|
+
def get(self, c: YamlConfigEntry) -> typing.Any:
|
|
131
|
+
return self._get_from_yaml(c)
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def yaml_config(self) -> typing.Dict[str, typing.Any] | None:
|
|
135
|
+
return self._yaml_config
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def resolve_config_path() -> pathlib.Path | None:
|
|
139
|
+
"""
|
|
140
|
+
Config is read from the following locations in order of precedence:
|
|
141
|
+
1. ./config.yaml if it exists
|
|
142
|
+
2. `UCTL_CONFIG` environment variable
|
|
143
|
+
3. `FLYTECTL_CONFIG` environment variable
|
|
144
|
+
4. ~/.union/config.yaml if it exists
|
|
145
|
+
5. ~/.flyte/config.yaml if it exists
|
|
146
|
+
"""
|
|
147
|
+
current_location_config = Path("config.yaml")
|
|
148
|
+
if current_location_config.exists():
|
|
149
|
+
return current_location_config
|
|
150
|
+
logger.debug("No ./config.yaml found, returning None")
|
|
151
|
+
|
|
152
|
+
uctl_path_from_env = getenv(UCTL_CONFIG_ENV_VAR, None)
|
|
153
|
+
if uctl_path_from_env:
|
|
154
|
+
return pathlib.Path(uctl_path_from_env)
|
|
155
|
+
logger.debug("No UCTL_CONFIG environment variable found, checking FLYTECTL_CONFIG")
|
|
156
|
+
|
|
157
|
+
flytectl_path_from_env = getenv(FLYTECTL_CONFIG_ENV_VAR, None)
|
|
158
|
+
if flytectl_path_from_env:
|
|
159
|
+
return pathlib.Path(flytectl_path_from_env)
|
|
160
|
+
logger.debug("No FLYTECTL_CONFIG environment variable found, checking default locations")
|
|
161
|
+
|
|
162
|
+
home_dir_union_config = Path(Path.home(), ".union", "config.yaml")
|
|
163
|
+
if home_dir_union_config.exists():
|
|
164
|
+
return home_dir_union_config
|
|
165
|
+
logger.debug("No ~/.union/config.yaml found, checking current directory")
|
|
166
|
+
|
|
167
|
+
home_dir_flytectl_config = Path(Path.home(), ".flyte", "config.yaml")
|
|
168
|
+
if home_dir_flytectl_config.exists():
|
|
169
|
+
return home_dir_flytectl_config
|
|
170
|
+
logger.debug("No ~/.flyte/config.yaml found, checking current directory")
|
|
171
|
+
|
|
172
|
+
return None
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
@lru_cache
|
|
176
|
+
def get_config_file(c: typing.Union[str, ConfigFile, None]) -> ConfigFile | None:
|
|
177
|
+
"""
|
|
178
|
+
Checks if the given argument is a file or a configFile and returns a loaded configFile else returns None
|
|
179
|
+
"""
|
|
180
|
+
if "PYTEST_VERSION" in os.environ:
|
|
181
|
+
# Use default local config in the pytest environment
|
|
182
|
+
return None
|
|
183
|
+
if isinstance(c, str):
|
|
184
|
+
logger.debug(f"Using specified config file at {c}")
|
|
185
|
+
return ConfigFile(c)
|
|
186
|
+
elif isinstance(c, ConfigFile):
|
|
187
|
+
return c
|
|
188
|
+
config_path = resolve_config_path()
|
|
189
|
+
if config_path:
|
|
190
|
+
return ConfigFile(str(config_path))
|
|
191
|
+
return None
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def read_file_if_exists(filename: typing.Optional[str], encoding=None) -> typing.Optional[str]:
|
|
195
|
+
"""
|
|
196
|
+
Reads the contents of the file if passed a path. Otherwise, returns None.
|
|
197
|
+
|
|
198
|
+
:param filename: The file path to load
|
|
199
|
+
:param encoding: The encoding to use when reading the file.
|
|
200
|
+
:return: The contents of the file as a string or None.
|
|
201
|
+
"""
|
|
202
|
+
if not filename:
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
file = pathlib.Path(filename)
|
|
206
|
+
logger.debug(f"Reading file contents from [{file}] with current directory [{os.getcwd()}].")
|
|
207
|
+
return file.read_text(encoding=encoding)
|
|
File without changes
|
flyte/errors.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Exceptions raised by Union.
|
|
3
|
+
|
|
4
|
+
These errors are raised when the underlying task execution fails, either because of a user error, system error or an
|
|
5
|
+
unknown error.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Literal
|
|
9
|
+
|
|
10
|
+
ErrorKind = Literal["system", "unknown", "user"]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BaseRuntimeError(RuntimeError):
|
|
14
|
+
"""
|
|
15
|
+
Base class for all Union runtime errors. These errors are raised when the underlying task execution fails, either
|
|
16
|
+
because of a user error, system error or an unknown error.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, code: str, kind: ErrorKind, root_cause_message: str, worker: str | None = None):
|
|
20
|
+
super().__init__(root_cause_message)
|
|
21
|
+
self.code = code
|
|
22
|
+
self.kind = kind
|
|
23
|
+
self.worker = worker
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class InitializationError(BaseRuntimeError):
|
|
27
|
+
"""
|
|
28
|
+
This error is raised when the Union system is tried to access without being initialized.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class RuntimeSystemError(BaseRuntimeError):
|
|
33
|
+
"""
|
|
34
|
+
This error is raised when the underlying task execution fails because of a system error. This could be a bug in the
|
|
35
|
+
Union system or a bug in the user's code.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, code: str, message: str, worker: str | None = None):
|
|
39
|
+
super().__init__(code, "system", message, worker)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class UnionRpcError(RuntimeSystemError):
|
|
43
|
+
"""
|
|
44
|
+
This error is raised when communication with the Union server fails.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class RuntimeUserError(BaseRuntimeError):
|
|
49
|
+
"""
|
|
50
|
+
This error is raised when the underlying task execution fails because of an error in the user's code.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(self, code: str, message: str, worker: str | None = None):
|
|
54
|
+
super().__init__(code, "user", message, worker)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class RuntimeUnknownError(BaseRuntimeError):
|
|
58
|
+
"""
|
|
59
|
+
This error is raised when the underlying task execution fails because of an unknown error.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(self, code: str, message: str, worker: str | None = None):
|
|
63
|
+
super().__init__(code, "unknown", message, worker)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class OOMError(RuntimeUserError):
|
|
67
|
+
"""
|
|
68
|
+
This error is raised when the underlying task execution fails because of an out-of-memory error.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class TaskInterruptedError(RuntimeUserError):
|
|
73
|
+
"""
|
|
74
|
+
This error is raised when the underlying task execution is interrupted.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class PrimaryContainerNotFoundError(RuntimeUserError):
|
|
79
|
+
"""
|
|
80
|
+
This error is raised when the primary container is not found.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class TaskTimeoutError(RuntimeUserError):
|
|
85
|
+
"""
|
|
86
|
+
This error is raised when the underlying task execution runs for longer than the specified timeout.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class RetriesExhaustedError(RuntimeUserError):
|
|
91
|
+
"""
|
|
92
|
+
This error is raised when the underlying task execution fails after all retries have been exhausted.
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class InvalidImageNameError(RuntimeUserError):
|
|
97
|
+
"""
|
|
98
|
+
This error is raised when the image name is invalid.
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class ImagePullBackOffError(RuntimeUserError):
|
|
103
|
+
"""
|
|
104
|
+
This error is raised when the image cannot be pulled.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class CustomError(RuntimeUserError):
|
|
109
|
+
"""
|
|
110
|
+
This error is raised when the user raises a custom error.
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
def __init__(self, code: str, message: str):
|
|
114
|
+
super().__init__(code, message, "user")
|
|
115
|
+
|
|
116
|
+
@classmethod
|
|
117
|
+
def from_exception(cls, e: Exception):
|
|
118
|
+
"""
|
|
119
|
+
Create a CustomError from an exception. The exception's class name is used as the error code and the exception
|
|
120
|
+
message is used as the error message.
|
|
121
|
+
"""
|
|
122
|
+
return cls(e.__class__.__name__, str(e))
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class NotInTaskContextError(RuntimeUserError):
|
|
126
|
+
"""
|
|
127
|
+
This error is raised when the user tries to access the task context outside of a task.
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class ActionNotFoundError(RuntimeError):
|
|
132
|
+
"""
|
|
133
|
+
This error is raised when the user tries to access an action that does not exist.
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class ReferenceTaskError(RuntimeUserError):
|
|
138
|
+
"""
|
|
139
|
+
This error is raised when the user tries to access a task that does not exist.
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
def __init__(self, message: str):
|
|
143
|
+
super().__init__("ReferenceTaskUsageError", message, "user")
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class LogsNotYetAvailableError(BaseRuntimeError):
|
|
147
|
+
"""
|
|
148
|
+
This error is raised when the logs are not yet available for a task.
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
def __init__(self, message: str):
|
|
152
|
+
super().__init__("LogsNotYetAvailable", "system", message, None)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class RuntimeDataValidationError(RuntimeUserError):
|
|
156
|
+
"""
|
|
157
|
+
This error is raised when the user tries to access a resource that does not exist or is invalid.
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
def __init__(self, var: str, e: Exception, task_name: str = ""):
|
|
161
|
+
super().__init__(
|
|
162
|
+
"DataValiationError", f"In task {task_name} variable {var}, failed to serialize/deserialize because {e}"
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class DeploymentError(RuntimeUserError):
|
|
167
|
+
"""
|
|
168
|
+
This error is raised when the deployment of a task fails, or some preconditions for deployment are not met.
|
|
169
|
+
"""
|
|
170
|
+
|
|
171
|
+
def __init__(self, message: str):
|
|
172
|
+
super().__init__("DeploymentError", message, "user")
|
flyte/extras/__init__.py
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pathlib
|
|
3
|
+
from typing import Any, Dict, List, Literal, Optional, Tuple, Type, Union
|
|
4
|
+
|
|
5
|
+
from flyteidl.core import tasks_pb2
|
|
6
|
+
|
|
7
|
+
from flyte import Image, storage
|
|
8
|
+
from flyte._logging import logger
|
|
9
|
+
from flyte._task import TaskTemplate
|
|
10
|
+
from flyte.models import NativeInterface, SerializationContext
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _extract_command_key(cmd: str, **kwargs) -> List[Any] | None:
|
|
14
|
+
"""
|
|
15
|
+
Extract the key from the command using regex.
|
|
16
|
+
"""
|
|
17
|
+
import re
|
|
18
|
+
|
|
19
|
+
input_regex = r"\{\{\.inputs\.([a-zA-Z0-9_]+)\}\}"
|
|
20
|
+
return re.findall(input_regex, cmd)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _extract_path_command_key(cmd: str, input_data_dir: Optional[str]) -> Optional[str]:
|
|
24
|
+
"""
|
|
25
|
+
Extract the key from the path-like command using regex.
|
|
26
|
+
"""
|
|
27
|
+
import re
|
|
28
|
+
|
|
29
|
+
input_data_dir = input_data_dir or ""
|
|
30
|
+
input_regex = rf"{re.escape(input_data_dir)}/(.+)$"
|
|
31
|
+
match = re.match(input_regex, cmd)
|
|
32
|
+
if match:
|
|
33
|
+
return match.group(1)
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ContainerTask(TaskTemplate):
|
|
38
|
+
"""
|
|
39
|
+
This is an intermediate class that represents Flyte Tasks that run a container at execution time. This is the vast
|
|
40
|
+
majority of tasks - the typical ``@task`` decorated tasks; for instance, all run a container. An example of
|
|
41
|
+
something that doesn't run a container would be something like the Athena SQL task.
|
|
42
|
+
|
|
43
|
+
:param name: Name of the task
|
|
44
|
+
:param image: The container image to use for the task. This can be a string or an Image object.
|
|
45
|
+
:param command: The command to run in the container. This can be a list of strings or a single string.
|
|
46
|
+
:param inputs: The inputs to the task. This is a dictionary of input names to types.
|
|
47
|
+
:param arguments: The arguments to pass to the command. This is a list of strings.
|
|
48
|
+
:param outputs: The outputs of the task. This is a dictionary of output names to types.
|
|
49
|
+
:param input_data_dir: The directory where the input data is stored. This is a string or a Path object.
|
|
50
|
+
:param output_data_dir: The directory where the output data is stored. This is a string or a Path object.
|
|
51
|
+
:param metadata_format: The format of the output file. This can be "JSON", "YAML", or "PROTO".
|
|
52
|
+
:param local_logs: If True, logs will be printed to the console in the local execution.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
MetadataFormat = Literal["JSON", "YAML", "PROTO"]
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
name: str,
|
|
60
|
+
image: Union[str, Image],
|
|
61
|
+
command: List[str],
|
|
62
|
+
inputs: Optional[Dict[str, Type]] = None,
|
|
63
|
+
arguments: Optional[List[str]] = None,
|
|
64
|
+
outputs: Optional[Dict[str, Type]] = None,
|
|
65
|
+
input_data_dir: str | pathlib.Path = "/var/inputs",
|
|
66
|
+
output_data_dir: str | pathlib.Path = "/var/outputs",
|
|
67
|
+
metadata_format: MetadataFormat = "JSON",
|
|
68
|
+
local_logs: bool = True,
|
|
69
|
+
**kwargs,
|
|
70
|
+
):
|
|
71
|
+
super().__init__(
|
|
72
|
+
task_type="raw-container",
|
|
73
|
+
name=name,
|
|
74
|
+
image=image,
|
|
75
|
+
interface=NativeInterface({k: (v, None) for k, v in inputs.items()} if inputs else {}, outputs or {}),
|
|
76
|
+
**kwargs,
|
|
77
|
+
)
|
|
78
|
+
self._image = image
|
|
79
|
+
if isinstance(image, str):
|
|
80
|
+
if image == "auto":
|
|
81
|
+
self._image = Image.auto()
|
|
82
|
+
else:
|
|
83
|
+
self._image = Image.from_prebuilt(image)
|
|
84
|
+
self._cmd = command
|
|
85
|
+
self._args = arguments
|
|
86
|
+
self._input_data_dir = input_data_dir
|
|
87
|
+
if isinstance(input_data_dir, str):
|
|
88
|
+
self._input_data_dir = pathlib.Path(input_data_dir)
|
|
89
|
+
self._output_data_dir = output_data_dir
|
|
90
|
+
if isinstance(output_data_dir, str):
|
|
91
|
+
self._output_data_dir = pathlib.Path(output_data_dir)
|
|
92
|
+
self._metadata_format = metadata_format
|
|
93
|
+
self._inputs = inputs
|
|
94
|
+
self._outputs = outputs
|
|
95
|
+
self.local_logs = local_logs
|
|
96
|
+
|
|
97
|
+
def _render_command_and_volume_binding(self, cmd: str, **kwargs) -> Tuple[str, Dict[str, Dict[str, str]]]:
|
|
98
|
+
"""
|
|
99
|
+
We support template-style references to inputs, e.g., "{{.inputs.infile}}".
|
|
100
|
+
|
|
101
|
+
For FlyteFile and FlyteDirectory commands, e.g., "/var/inputs/inputs", we extract the key from strings that
|
|
102
|
+
begin with the specified `input_data_dir`.
|
|
103
|
+
"""
|
|
104
|
+
from flyte.io import Dir, File
|
|
105
|
+
|
|
106
|
+
volume_binding: Dict[str, Dict[str, str]] = {}
|
|
107
|
+
path_k = _extract_path_command_key(cmd, str(self._input_data_dir))
|
|
108
|
+
keys = path_k if path_k else _extract_command_key(cmd)
|
|
109
|
+
|
|
110
|
+
if keys:
|
|
111
|
+
for k in keys:
|
|
112
|
+
input_val = kwargs.get(k)
|
|
113
|
+
# TODO: Add support file and directory transformer first
|
|
114
|
+
if type(input_val) in [File, Dir]:
|
|
115
|
+
if not path_k:
|
|
116
|
+
raise AssertionError(
|
|
117
|
+
"File and Directory commands should not use the template syntax "
|
|
118
|
+
"like this: {{.inputs.infile}}\n"
|
|
119
|
+
"Please use a path-like syntax, such as: /var/inputs/infile.\n"
|
|
120
|
+
"This requirement is due to how Flyte Propeller processes template syntax inputs."
|
|
121
|
+
)
|
|
122
|
+
local_flyte_file_or_dir_path = str(input_val)
|
|
123
|
+
remote_flyte_file_or_dir_path = os.path.join(self._input_data_dir, k) # type: ignore
|
|
124
|
+
volume_binding[local_flyte_file_or_dir_path] = {
|
|
125
|
+
"bind": remote_flyte_file_or_dir_path,
|
|
126
|
+
"mode": "rw",
|
|
127
|
+
}
|
|
128
|
+
command = remote_flyte_file_or_dir_path
|
|
129
|
+
else:
|
|
130
|
+
command = cmd
|
|
131
|
+
|
|
132
|
+
return command, volume_binding
|
|
133
|
+
|
|
134
|
+
def _prepare_command_and_volumes(
|
|
135
|
+
self, cmd_and_args: List[str], **kwargs
|
|
136
|
+
) -> Tuple[List[str], Dict[str, Dict[str, str]]]:
|
|
137
|
+
"""
|
|
138
|
+
Prepares the command and volume bindings for the container based on input arguments and command templates.
|
|
139
|
+
|
|
140
|
+
Parameters:
|
|
141
|
+
- cmd_and_args (List[str]): The command and arguments to prepare.
|
|
142
|
+
- **kwargs: Keyword arguments representing task inputs.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
- Tuple[List[str], Dict[str, Dict[str, str]]]: A tuple containing the prepared commands and volume bindings.
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
commands = []
|
|
149
|
+
volume_bindings = {}
|
|
150
|
+
|
|
151
|
+
for cmd in cmd_and_args:
|
|
152
|
+
command, volume_binding = self._render_command_and_volume_binding(cmd, **kwargs)
|
|
153
|
+
commands.append(command)
|
|
154
|
+
volume_bindings.update(volume_binding)
|
|
155
|
+
|
|
156
|
+
return commands, volume_bindings
|
|
157
|
+
|
|
158
|
+
def _pull_image_if_not_exists(self, client, image: str):
|
|
159
|
+
try:
|
|
160
|
+
if not client.images.list(filters={"reference": image}):
|
|
161
|
+
logger.info(f"Pulling image: {image} for container task: {self.name}")
|
|
162
|
+
client.images.pull(image)
|
|
163
|
+
except Exception as e:
|
|
164
|
+
logger.error(f"Failed to pull image {image}: {e!s}")
|
|
165
|
+
raise
|
|
166
|
+
|
|
167
|
+
def _string_to_timedelta(self, s: str):
|
|
168
|
+
import datetime
|
|
169
|
+
import re
|
|
170
|
+
|
|
171
|
+
regex = r"(?:(\d+) days?, )?(?:(\d+):)?(\d+):(\d+)(?:\.(\d+))?"
|
|
172
|
+
parts = re.match(regex, s)
|
|
173
|
+
if not parts:
|
|
174
|
+
raise ValueError("Invalid timedelta string format")
|
|
175
|
+
|
|
176
|
+
days = int(parts.group(1)) if parts.group(1) else 0
|
|
177
|
+
hours = int(parts.group(2)) if parts.group(2) else 0
|
|
178
|
+
minutes = int(parts.group(3)) if parts.group(3) else 0
|
|
179
|
+
seconds = int(parts.group(4)) if parts.group(4) else 0
|
|
180
|
+
microseconds = int(parts.group(5)) if parts.group(5) else 0
|
|
181
|
+
|
|
182
|
+
return datetime.timedelta(
|
|
183
|
+
days=days,
|
|
184
|
+
hours=hours,
|
|
185
|
+
minutes=minutes,
|
|
186
|
+
seconds=seconds,
|
|
187
|
+
microseconds=microseconds,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
def _convert_output_val_to_correct_type(self, output_val: Any, output_type: Type) -> Any:
|
|
191
|
+
import datetime
|
|
192
|
+
|
|
193
|
+
if issubclass(output_type, bool):
|
|
194
|
+
return output_val.lower() != "false"
|
|
195
|
+
elif issubclass(output_type, datetime.datetime):
|
|
196
|
+
return datetime.datetime.fromisoformat(output_val)
|
|
197
|
+
elif issubclass(output_type, datetime.timedelta):
|
|
198
|
+
return self._string_to_timedelta(output_val)
|
|
199
|
+
else:
|
|
200
|
+
return output_type(output_val)
|
|
201
|
+
|
|
202
|
+
def _get_output_dict(self, output_directory: pathlib.Path) -> Dict[str, Any]:
|
|
203
|
+
output_dict = {}
|
|
204
|
+
if self._outputs:
|
|
205
|
+
for k, output_type in self._outputs.items():
|
|
206
|
+
output_path = output_directory / k
|
|
207
|
+
with output_path.open("r") as f:
|
|
208
|
+
output_val = f.read()
|
|
209
|
+
output_dict[k] = self._convert_output_val_to_correct_type(output_val, output_type)
|
|
210
|
+
return output_dict
|
|
211
|
+
|
|
212
|
+
async def execute(self, **kwargs) -> Any:
|
|
213
|
+
try:
|
|
214
|
+
import docker
|
|
215
|
+
except ImportError:
|
|
216
|
+
raise ImportError("Docker is not installed. Please install Docker by running `pip install docker`.")
|
|
217
|
+
|
|
218
|
+
# Normalize the input and output directories
|
|
219
|
+
self._input_data_dir = os.path.normpath(self._input_data_dir) if self._input_data_dir else ""
|
|
220
|
+
self._output_data_dir = os.path.normpath(self._output_data_dir) if self._output_data_dir else ""
|
|
221
|
+
|
|
222
|
+
output_directory = storage.get_random_local_directory()
|
|
223
|
+
cmd_and_args = (self._cmd or []) + (self._args or [])
|
|
224
|
+
commands, volume_bindings = self._prepare_command_and_volumes(cmd_and_args, **kwargs)
|
|
225
|
+
volume_bindings[str(output_directory)] = {"bind": self._output_data_dir, "mode": "rw"}
|
|
226
|
+
|
|
227
|
+
client = docker.from_env()
|
|
228
|
+
if isinstance(self._image, str):
|
|
229
|
+
raise AssertionError(f"Only Image objects are supported, not strings. Got {self._image} instead.")
|
|
230
|
+
uri = self._image.uri
|
|
231
|
+
self._pull_image_if_not_exists(client, uri)
|
|
232
|
+
print(f"Command: {commands!r}")
|
|
233
|
+
|
|
234
|
+
container = client.containers.run(uri, command=commands, remove=True, volumes=volume_bindings, detach=True)
|
|
235
|
+
|
|
236
|
+
# Wait for the container to finish the task
|
|
237
|
+
# TODO: Add a 'timeout' parameter to control the max wait time for the container to finish the task.
|
|
238
|
+
|
|
239
|
+
if self.local_logs:
|
|
240
|
+
for log in container.logs(stream=True):
|
|
241
|
+
print(f"[Local Container] {log.strip()!r}")
|
|
242
|
+
|
|
243
|
+
container.wait()
|
|
244
|
+
|
|
245
|
+
output_dict = self._get_output_dict(output_directory)
|
|
246
|
+
return output_dict
|
|
247
|
+
|
|
248
|
+
def data_loading_config(self, sctx: SerializationContext) -> tasks_pb2.DataLoadingConfig:
|
|
249
|
+
literal_to_protobuf = {
|
|
250
|
+
"JSON": tasks_pb2.DataLoadingConfig.JSON,
|
|
251
|
+
"YAML": tasks_pb2.DataLoadingConfig.YAML,
|
|
252
|
+
"PROTO": tasks_pb2.DataLoadingConfig.PROTO,
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
return tasks_pb2.DataLoadingConfig(
|
|
256
|
+
input_path=str(self._input_data_dir),
|
|
257
|
+
output_path=str(self._output_data_dir),
|
|
258
|
+
enabled=True,
|
|
259
|
+
format=literal_to_protobuf.get(self._metadata_format, "JSON"),
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
def container_args(self, sctx: SerializationContext) -> List[str]:
|
|
263
|
+
return self._cmd + (self._args if self._args else [])
|
flyte/io/__init__.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""
|
|
2
|
+
## IO data types
|
|
3
|
+
|
|
4
|
+
This package contains additional data types beyond the primitive data types in python to abstract data flow
|
|
5
|
+
of large datasets in Union.
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"Dir",
|
|
11
|
+
"File",
|
|
12
|
+
"StructuredDataset",
|
|
13
|
+
"StructuredDatasetDecoder",
|
|
14
|
+
"StructuredDatasetEncoder",
|
|
15
|
+
"StructuredDatasetTransformerEngine",
|
|
16
|
+
"lazy_import_structured_dataset_handler",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
from ._dir import Dir
|
|
20
|
+
from ._file import File
|
|
21
|
+
from ._structured_dataset import (
|
|
22
|
+
StructuredDataset,
|
|
23
|
+
StructuredDatasetDecoder,
|
|
24
|
+
StructuredDatasetEncoder,
|
|
25
|
+
StructuredDatasetTransformerEngine,
|
|
26
|
+
lazy_import_structured_dataset_handler,
|
|
27
|
+
)
|