flyte 0.2.0b1__py3-none-any.whl → 2.0.0b46__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flyte/__init__.py +83 -30
- flyte/_bin/connect.py +61 -0
- flyte/_bin/debug.py +38 -0
- flyte/_bin/runtime.py +87 -19
- flyte/_bin/serve.py +351 -0
- flyte/_build.py +3 -2
- flyte/_cache/cache.py +6 -5
- flyte/_cache/local_cache.py +216 -0
- flyte/_code_bundle/_ignore.py +31 -5
- flyte/_code_bundle/_packaging.py +42 -11
- flyte/_code_bundle/_utils.py +57 -34
- flyte/_code_bundle/bundle.py +130 -27
- flyte/_constants.py +1 -0
- flyte/_context.py +21 -5
- flyte/_custom_context.py +73 -0
- flyte/_debug/constants.py +37 -0
- flyte/_debug/utils.py +17 -0
- flyte/_debug/vscode.py +315 -0
- flyte/_deploy.py +396 -75
- flyte/_deployer.py +109 -0
- flyte/_environment.py +94 -11
- flyte/_excepthook.py +37 -0
- flyte/_group.py +2 -1
- flyte/_hash.py +1 -16
- flyte/_image.py +544 -231
- flyte/_initialize.py +456 -316
- flyte/_interface.py +40 -5
- flyte/_internal/controllers/__init__.py +22 -8
- flyte/_internal/controllers/_local_controller.py +159 -35
- flyte/_internal/controllers/_trace.py +18 -10
- flyte/_internal/controllers/remote/__init__.py +38 -9
- flyte/_internal/controllers/remote/_action.py +82 -12
- flyte/_internal/controllers/remote/_client.py +6 -2
- flyte/_internal/controllers/remote/_controller.py +290 -64
- flyte/_internal/controllers/remote/_core.py +155 -95
- flyte/_internal/controllers/remote/_informer.py +40 -20
- flyte/_internal/controllers/remote/_service_protocol.py +2 -2
- flyte/_internal/imagebuild/__init__.py +2 -10
- flyte/_internal/imagebuild/docker_builder.py +391 -84
- flyte/_internal/imagebuild/image_builder.py +111 -55
- flyte/_internal/imagebuild/remote_builder.py +409 -0
- flyte/_internal/imagebuild/utils.py +79 -0
- flyte/_internal/resolvers/_app_env_module.py +92 -0
- flyte/_internal/resolvers/_task_module.py +5 -38
- flyte/_internal/resolvers/app_env.py +26 -0
- flyte/_internal/resolvers/common.py +8 -1
- flyte/_internal/resolvers/default.py +2 -2
- flyte/_internal/runtime/convert.py +319 -36
- flyte/_internal/runtime/entrypoints.py +106 -18
- flyte/_internal/runtime/io.py +71 -23
- flyte/_internal/runtime/resources_serde.py +21 -7
- flyte/_internal/runtime/reuse.py +125 -0
- flyte/_internal/runtime/rusty.py +196 -0
- flyte/_internal/runtime/task_serde.py +239 -66
- flyte/_internal/runtime/taskrunner.py +48 -8
- flyte/_internal/runtime/trigger_serde.py +162 -0
- flyte/_internal/runtime/types_serde.py +7 -16
- flyte/_keyring/file.py +115 -0
- flyte/_link.py +30 -0
- flyte/_logging.py +241 -42
- flyte/_map.py +312 -0
- flyte/_metrics.py +59 -0
- flyte/_module.py +74 -0
- flyte/_pod.py +30 -0
- flyte/_resources.py +296 -33
- flyte/_retry.py +1 -7
- flyte/_reusable_environment.py +72 -7
- flyte/_run.py +462 -132
- flyte/_secret.py +47 -11
- flyte/_serve.py +333 -0
- flyte/_task.py +245 -56
- flyte/_task_environment.py +219 -97
- flyte/_task_plugins.py +47 -0
- flyte/_tools.py +8 -8
- flyte/_trace.py +15 -24
- flyte/_trigger.py +1027 -0
- flyte/_utils/__init__.py +12 -1
- flyte/_utils/asyn.py +3 -1
- flyte/_utils/async_cache.py +139 -0
- flyte/_utils/coro_management.py +5 -4
- flyte/_utils/description_parser.py +19 -0
- flyte/_utils/docker_credentials.py +173 -0
- flyte/_utils/helpers.py +45 -19
- flyte/_utils/module_loader.py +123 -0
- flyte/_utils/org_discovery.py +57 -0
- flyte/_utils/uv_script_parser.py +8 -1
- flyte/_version.py +16 -3
- flyte/app/__init__.py +27 -0
- flyte/app/_app_environment.py +362 -0
- flyte/app/_connector_environment.py +40 -0
- flyte/app/_deploy.py +130 -0
- flyte/app/_parameter.py +343 -0
- flyte/app/_runtime/__init__.py +3 -0
- flyte/app/_runtime/app_serde.py +383 -0
- flyte/app/_types.py +113 -0
- flyte/app/extras/__init__.py +9 -0
- flyte/app/extras/_auth_middleware.py +217 -0
- flyte/app/extras/_fastapi.py +93 -0
- flyte/app/extras/_model_loader/__init__.py +3 -0
- flyte/app/extras/_model_loader/config.py +7 -0
- flyte/app/extras/_model_loader/loader.py +288 -0
- flyte/cli/__init__.py +12 -0
- flyte/cli/_abort.py +28 -0
- flyte/cli/_build.py +114 -0
- flyte/cli/_common.py +493 -0
- flyte/cli/_create.py +371 -0
- flyte/cli/_delete.py +45 -0
- flyte/cli/_deploy.py +401 -0
- flyte/cli/_gen.py +316 -0
- flyte/cli/_get.py +446 -0
- flyte/cli/_option.py +33 -0
- flyte/{_cli → cli}/_params.py +57 -17
- flyte/cli/_plugins.py +209 -0
- flyte/cli/_prefetch.py +292 -0
- flyte/cli/_run.py +690 -0
- flyte/cli/_serve.py +338 -0
- flyte/cli/_update.py +86 -0
- flyte/cli/_user.py +20 -0
- flyte/cli/main.py +246 -0
- flyte/config/__init__.py +2 -167
- flyte/config/_config.py +215 -163
- flyte/config/_internal.py +10 -1
- flyte/config/_reader.py +225 -0
- flyte/connectors/__init__.py +11 -0
- flyte/connectors/_connector.py +330 -0
- flyte/connectors/_server.py +194 -0
- flyte/connectors/utils.py +159 -0
- flyte/errors.py +134 -2
- flyte/extend.py +24 -0
- flyte/extras/_container.py +69 -56
- flyte/git/__init__.py +3 -0
- flyte/git/_config.py +279 -0
- flyte/io/__init__.py +8 -1
- flyte/io/{structured_dataset → _dataframe}/__init__.py +32 -30
- flyte/io/{structured_dataset → _dataframe}/basic_dfs.py +75 -68
- flyte/io/{structured_dataset/structured_dataset.py → _dataframe/dataframe.py} +207 -242
- flyte/io/_dir.py +575 -113
- flyte/io/_file.py +587 -141
- flyte/io/_hashing_io.py +342 -0
- flyte/io/extend.py +7 -0
- flyte/models.py +635 -0
- flyte/prefetch/__init__.py +22 -0
- flyte/prefetch/_hf_model.py +563 -0
- flyte/remote/__init__.py +14 -3
- flyte/remote/_action.py +879 -0
- flyte/remote/_app.py +346 -0
- flyte/remote/_auth_metadata.py +42 -0
- flyte/remote/_client/_protocols.py +62 -4
- flyte/remote/_client/auth/_auth_utils.py +19 -0
- flyte/remote/_client/auth/_authenticators/base.py +8 -2
- flyte/remote/_client/auth/_authenticators/device_code.py +4 -5
- flyte/remote/_client/auth/_authenticators/factory.py +4 -0
- flyte/remote/_client/auth/_authenticators/passthrough.py +79 -0
- flyte/remote/_client/auth/_authenticators/pkce.py +17 -18
- flyte/remote/_client/auth/_channel.py +47 -18
- flyte/remote/_client/auth/_client_config.py +5 -3
- flyte/remote/_client/auth/_keyring.py +15 -2
- flyte/remote/_client/auth/_token_client.py +3 -3
- flyte/remote/_client/controlplane.py +206 -18
- flyte/remote/_common.py +66 -0
- flyte/remote/_data.py +107 -22
- flyte/remote/_logs.py +116 -33
- flyte/remote/_project.py +21 -19
- flyte/remote/_run.py +164 -631
- flyte/remote/_secret.py +72 -29
- flyte/remote/_task.py +387 -46
- flyte/remote/_trigger.py +368 -0
- flyte/remote/_user.py +43 -0
- flyte/report/_report.py +10 -6
- flyte/storage/__init__.py +13 -1
- flyte/storage/_config.py +237 -0
- flyte/storage/_parallel_reader.py +289 -0
- flyte/storage/_storage.py +268 -59
- flyte/syncify/__init__.py +56 -0
- flyte/syncify/_api.py +414 -0
- flyte/types/__init__.py +39 -0
- flyte/types/_interface.py +22 -7
- flyte/{io/pickle/transformer.py → types/_pickle.py} +37 -9
- flyte/types/_string_literals.py +8 -9
- flyte/types/_type_engine.py +226 -126
- flyte/types/_utils.py +1 -1
- flyte-2.0.0b46.data/scripts/debug.py +38 -0
- flyte-2.0.0b46.data/scripts/runtime.py +194 -0
- flyte-2.0.0b46.dist-info/METADATA +352 -0
- flyte-2.0.0b46.dist-info/RECORD +221 -0
- flyte-2.0.0b46.dist-info/entry_points.txt +8 -0
- flyte-2.0.0b46.dist-info/licenses/LICENSE +201 -0
- flyte/_api_commons.py +0 -3
- flyte/_cli/_common.py +0 -299
- flyte/_cli/_create.py +0 -42
- flyte/_cli/_delete.py +0 -23
- flyte/_cli/_deploy.py +0 -140
- flyte/_cli/_get.py +0 -235
- flyte/_cli/_run.py +0 -174
- flyte/_cli/main.py +0 -98
- flyte/_datastructures.py +0 -342
- flyte/_internal/controllers/pbhash.py +0 -39
- flyte/_protos/common/authorization_pb2.py +0 -66
- flyte/_protos/common/authorization_pb2.pyi +0 -108
- flyte/_protos/common/authorization_pb2_grpc.py +0 -4
- flyte/_protos/common/identifier_pb2.py +0 -71
- flyte/_protos/common/identifier_pb2.pyi +0 -82
- flyte/_protos/common/identifier_pb2_grpc.py +0 -4
- flyte/_protos/common/identity_pb2.py +0 -48
- flyte/_protos/common/identity_pb2.pyi +0 -72
- flyte/_protos/common/identity_pb2_grpc.py +0 -4
- flyte/_protos/common/list_pb2.py +0 -36
- flyte/_protos/common/list_pb2.pyi +0 -69
- flyte/_protos/common/list_pb2_grpc.py +0 -4
- flyte/_protos/common/policy_pb2.py +0 -37
- flyte/_protos/common/policy_pb2.pyi +0 -27
- flyte/_protos/common/policy_pb2_grpc.py +0 -4
- flyte/_protos/common/role_pb2.py +0 -37
- flyte/_protos/common/role_pb2.pyi +0 -53
- flyte/_protos/common/role_pb2_grpc.py +0 -4
- flyte/_protos/common/runtime_version_pb2.py +0 -28
- flyte/_protos/common/runtime_version_pb2.pyi +0 -24
- flyte/_protos/common/runtime_version_pb2_grpc.py +0 -4
- flyte/_protos/logs/dataplane/payload_pb2.py +0 -96
- flyte/_protos/logs/dataplane/payload_pb2.pyi +0 -168
- flyte/_protos/logs/dataplane/payload_pb2_grpc.py +0 -4
- flyte/_protos/secret/definition_pb2.py +0 -49
- flyte/_protos/secret/definition_pb2.pyi +0 -93
- flyte/_protos/secret/definition_pb2_grpc.py +0 -4
- flyte/_protos/secret/payload_pb2.py +0 -62
- flyte/_protos/secret/payload_pb2.pyi +0 -94
- flyte/_protos/secret/payload_pb2_grpc.py +0 -4
- flyte/_protos/secret/secret_pb2.py +0 -38
- flyte/_protos/secret/secret_pb2.pyi +0 -6
- flyte/_protos/secret/secret_pb2_grpc.py +0 -198
- flyte/_protos/secret/secret_pb2_grpc_grpc.py +0 -198
- flyte/_protos/validate/validate/validate_pb2.py +0 -76
- flyte/_protos/workflow/node_execution_service_pb2.py +0 -26
- flyte/_protos/workflow/node_execution_service_pb2.pyi +0 -4
- flyte/_protos/workflow/node_execution_service_pb2_grpc.py +0 -32
- flyte/_protos/workflow/queue_service_pb2.py +0 -106
- flyte/_protos/workflow/queue_service_pb2.pyi +0 -141
- flyte/_protos/workflow/queue_service_pb2_grpc.py +0 -172
- flyte/_protos/workflow/run_definition_pb2.py +0 -128
- flyte/_protos/workflow/run_definition_pb2.pyi +0 -310
- flyte/_protos/workflow/run_definition_pb2_grpc.py +0 -4
- flyte/_protos/workflow/run_logs_service_pb2.py +0 -41
- flyte/_protos/workflow/run_logs_service_pb2.pyi +0 -28
- flyte/_protos/workflow/run_logs_service_pb2_grpc.py +0 -69
- flyte/_protos/workflow/run_service_pb2.py +0 -133
- flyte/_protos/workflow/run_service_pb2.pyi +0 -175
- flyte/_protos/workflow/run_service_pb2_grpc.py +0 -412
- flyte/_protos/workflow/state_service_pb2.py +0 -58
- flyte/_protos/workflow/state_service_pb2.pyi +0 -71
- flyte/_protos/workflow/state_service_pb2_grpc.py +0 -138
- flyte/_protos/workflow/task_definition_pb2.py +0 -72
- flyte/_protos/workflow/task_definition_pb2.pyi +0 -65
- flyte/_protos/workflow/task_definition_pb2_grpc.py +0 -4
- flyte/_protos/workflow/task_service_pb2.py +0 -44
- flyte/_protos/workflow/task_service_pb2.pyi +0 -31
- flyte/_protos/workflow/task_service_pb2_grpc.py +0 -104
- flyte/io/_dataframe.py +0 -0
- flyte/io/pickle/__init__.py +0 -0
- flyte/remote/_console.py +0 -18
- flyte-0.2.0b1.dist-info/METADATA +0 -179
- flyte-0.2.0b1.dist-info/RECORD +0 -204
- flyte-0.2.0b1.dist-info/entry_points.txt +0 -3
- /flyte/{_cli → _debug}/__init__.py +0 -0
- /flyte/{_protos → _keyring}/__init__.py +0 -0
- {flyte-0.2.0b1.dist-info → flyte-2.0.0b46.dist-info}/WHEEL +0 -0
- {flyte-0.2.0b1.dist-info → flyte-2.0.0b46.dist-info}/top_level.txt +0 -0
flyte/extras/_container.py
CHANGED
|
@@ -2,27 +2,23 @@ import os
|
|
|
2
2
|
import pathlib
|
|
3
3
|
from typing import Any, Dict, List, Literal, Optional, Tuple, Type, Union
|
|
4
4
|
|
|
5
|
-
from
|
|
5
|
+
from flyteidl2.core import tasks_pb2
|
|
6
6
|
|
|
7
7
|
from flyte import Image, storage
|
|
8
|
-
from flyte._datastructures import NativeInterface, SerializationContext
|
|
9
8
|
from flyte._logging import logger
|
|
10
9
|
from flyte._task import TaskTemplate
|
|
10
|
+
from flyte.io import Dir, File
|
|
11
|
+
from flyte.models import NativeInterface, SerializationContext
|
|
11
12
|
|
|
12
|
-
_PRIMARY_CONTAINER_NAME_FIELD = "primary_container_name"
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
def _extract_command_key(cmd: str, **kwargs) -> Any:
|
|
14
|
+
def _extract_command_key(cmd: str, **kwargs) -> List[Any] | None:
|
|
16
15
|
"""
|
|
17
16
|
Extract the key from the command using regex.
|
|
18
17
|
"""
|
|
19
18
|
import re
|
|
20
19
|
|
|
21
|
-
input_regex = r"
|
|
22
|
-
|
|
23
|
-
if match:
|
|
24
|
-
return match.group(1)
|
|
25
|
-
return None
|
|
20
|
+
input_regex = r"\{\{\.inputs\.([a-zA-Z0-9_]+)\}\}"
|
|
21
|
+
return re.findall(input_regex, cmd)
|
|
26
22
|
|
|
27
23
|
|
|
28
24
|
def _extract_path_command_key(cmd: str, input_data_dir: Optional[str]) -> Optional[str]:
|
|
@@ -32,8 +28,9 @@ def _extract_path_command_key(cmd: str, input_data_dir: Optional[str]) -> Option
|
|
|
32
28
|
import re
|
|
33
29
|
|
|
34
30
|
input_data_dir = input_data_dir or ""
|
|
35
|
-
input_regex = rf"{re.escape(input_data_dir)}/(
|
|
36
|
-
|
|
31
|
+
input_regex = rf"{re.escape(input_data_dir)}/([\w\-.]+)" # captures file or dir names
|
|
32
|
+
|
|
33
|
+
match = re.search(input_regex, cmd)
|
|
37
34
|
if match:
|
|
38
35
|
return match.group(1)
|
|
39
36
|
return None
|
|
@@ -70,7 +67,7 @@ class ContainerTask(TaskTemplate):
|
|
|
70
67
|
input_data_dir: str | pathlib.Path = "/var/inputs",
|
|
71
68
|
output_data_dir: str | pathlib.Path = "/var/outputs",
|
|
72
69
|
metadata_format: MetadataFormat = "JSON",
|
|
73
|
-
local_logs: bool =
|
|
70
|
+
local_logs: bool = True,
|
|
74
71
|
**kwargs,
|
|
75
72
|
):
|
|
76
73
|
super().__init__(
|
|
@@ -83,9 +80,14 @@ class ContainerTask(TaskTemplate):
|
|
|
83
80
|
self._image = image
|
|
84
81
|
if isinstance(image, str):
|
|
85
82
|
if image == "auto":
|
|
86
|
-
self._image = Image.
|
|
83
|
+
self._image = Image.from_debian_base()
|
|
87
84
|
else:
|
|
88
|
-
self._image = Image.
|
|
85
|
+
self._image = Image.from_base(image)
|
|
86
|
+
|
|
87
|
+
if command and any(not isinstance(c, str) for c in command):
|
|
88
|
+
raise ValueError("All elements in the command list must be strings.")
|
|
89
|
+
if arguments and any(not isinstance(a, str) for a in arguments):
|
|
90
|
+
raise ValueError("All elements in the arguments list must be strings.")
|
|
89
91
|
self._cmd = command
|
|
90
92
|
self._args = arguments
|
|
91
93
|
self._input_data_dir = input_data_dir
|
|
@@ -106,32 +108,34 @@ class ContainerTask(TaskTemplate):
|
|
|
106
108
|
For FlyteFile and FlyteDirectory commands, e.g., "/var/inputs/inputs", we extract the key from strings that
|
|
107
109
|
begin with the specified `input_data_dir`.
|
|
108
110
|
"""
|
|
109
|
-
|
|
110
|
-
# from flytekit.types.file import FlyteFile
|
|
111
|
+
from flyte.io import Dir, File
|
|
111
112
|
|
|
112
113
|
volume_binding: Dict[str, Dict[str, str]] = {}
|
|
113
114
|
path_k = _extract_path_command_key(cmd, str(self._input_data_dir))
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
115
|
+
keys = [path_k] if path_k else _extract_command_key(cmd)
|
|
116
|
+
|
|
117
|
+
command = cmd
|
|
118
|
+
|
|
119
|
+
if keys:
|
|
120
|
+
for k in keys:
|
|
121
|
+
input_val = kwargs.get(k)
|
|
122
|
+
# TODO: Add support file and directory transformer first
|
|
123
|
+
if input_val and type(input_val) in [File, Dir]:
|
|
124
|
+
if not path_k:
|
|
125
|
+
raise AssertionError(
|
|
126
|
+
"File and Directory commands should not use the template syntax "
|
|
127
|
+
"like this: {{.inputs.infile}}\n"
|
|
128
|
+
"Please use a path-like syntax, such as: /var/inputs/infile.\n"
|
|
129
|
+
"This requirement is due to how Flyte Propeller processes template syntax inputs."
|
|
130
|
+
)
|
|
131
|
+
local_flyte_file_or_dir_path = input_val.path
|
|
132
|
+
remote_flyte_file_or_dir_path = os.path.join(self._input_data_dir, k) # type: ignore
|
|
133
|
+
volume_binding[local_flyte_file_or_dir_path] = {
|
|
134
|
+
"bind": remote_flyte_file_or_dir_path,
|
|
135
|
+
"mode": "rw",
|
|
136
|
+
}
|
|
137
|
+
else:
|
|
138
|
+
command = command.replace(f"{{{{.inputs.{k}}}}}", str(input_val))
|
|
135
139
|
else:
|
|
136
140
|
command = cmd
|
|
137
141
|
|
|
@@ -193,7 +197,9 @@ class ContainerTask(TaskTemplate):
|
|
|
193
197
|
microseconds=microseconds,
|
|
194
198
|
)
|
|
195
199
|
|
|
196
|
-
def _convert_output_val_to_correct_type(
|
|
200
|
+
async def _convert_output_val_to_correct_type(
|
|
201
|
+
self, output_path: pathlib.Path, output_val: Any, output_type: Type
|
|
202
|
+
) -> Any:
|
|
197
203
|
import datetime
|
|
198
204
|
|
|
199
205
|
if issubclass(output_type, bool):
|
|
@@ -202,20 +208,31 @@ class ContainerTask(TaskTemplate):
|
|
|
202
208
|
return datetime.datetime.fromisoformat(output_val)
|
|
203
209
|
elif issubclass(output_type, datetime.timedelta):
|
|
204
210
|
return self._string_to_timedelta(output_val)
|
|
211
|
+
elif issubclass(output_type, File):
|
|
212
|
+
return await File.from_local(output_path)
|
|
213
|
+
elif issubclass(output_type, Dir):
|
|
214
|
+
return await Dir.from_local(output_path)
|
|
205
215
|
else:
|
|
206
216
|
return output_type(output_val)
|
|
207
217
|
|
|
208
|
-
def
|
|
209
|
-
|
|
218
|
+
async def _get_output(self, output_directory: pathlib.Path) -> Tuple[Any]:
|
|
219
|
+
output_items = []
|
|
210
220
|
if self._outputs:
|
|
211
221
|
for k, output_type in self._outputs.items():
|
|
212
222
|
output_path = output_directory / k
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
223
|
+
if os.path.isfile(output_path):
|
|
224
|
+
with output_path.open("r") as f:
|
|
225
|
+
output_val = f.read()
|
|
226
|
+
else:
|
|
227
|
+
output_val = None
|
|
228
|
+
parsed = await self._convert_output_val_to_correct_type(output_path, output_val, output_type)
|
|
229
|
+
output_items.append(parsed)
|
|
230
|
+
# return a tuple so that each element is treated as a separate output.
|
|
231
|
+
# this allows flyte to map the user-defined output types (dict) to individual values.
|
|
232
|
+
# if we returned a list instead, it would be treated as a single output.
|
|
233
|
+
return tuple(output_items)
|
|
234
|
+
|
|
235
|
+
async def execute(self, **kwargs) -> Any:
|
|
219
236
|
try:
|
|
220
237
|
import docker
|
|
221
238
|
except ImportError:
|
|
@@ -235,6 +252,7 @@ class ContainerTask(TaskTemplate):
|
|
|
235
252
|
raise AssertionError(f"Only Image objects are supported, not strings. Got {self._image} instead.")
|
|
236
253
|
uri = self._image.uri
|
|
237
254
|
self._pull_image_if_not_exists(client, uri)
|
|
255
|
+
print(f"Command: {commands!r}")
|
|
238
256
|
|
|
239
257
|
container = client.containers.run(uri, command=commands, remove=True, volumes=volume_bindings, detach=True)
|
|
240
258
|
|
|
@@ -247,8 +265,8 @@ class ContainerTask(TaskTemplate):
|
|
|
247
265
|
|
|
248
266
|
container.wait()
|
|
249
267
|
|
|
250
|
-
|
|
251
|
-
return
|
|
268
|
+
output = await self._get_output(output_directory)
|
|
269
|
+
return output
|
|
252
270
|
|
|
253
271
|
def data_loading_config(self, sctx: SerializationContext) -> tasks_pb2.DataLoadingConfig:
|
|
254
272
|
literal_to_protobuf = {
|
|
@@ -258,16 +276,11 @@ class ContainerTask(TaskTemplate):
|
|
|
258
276
|
}
|
|
259
277
|
|
|
260
278
|
return tasks_pb2.DataLoadingConfig(
|
|
261
|
-
input_path=self._input_data_dir,
|
|
262
|
-
output_path=self._output_data_dir,
|
|
279
|
+
input_path=str(self._input_data_dir) if self._input_data_dir else None,
|
|
280
|
+
output_path=str(self._output_data_dir) if self._output_data_dir else None,
|
|
263
281
|
enabled=True,
|
|
264
282
|
format=literal_to_protobuf.get(self._metadata_format, "JSON"),
|
|
265
283
|
)
|
|
266
284
|
|
|
267
285
|
def container_args(self, sctx: SerializationContext) -> List[str]:
|
|
268
286
|
return self._cmd + (self._args if self._args else [])
|
|
269
|
-
|
|
270
|
-
def config(self, sctx: SerializationContext) -> Dict[str, str]:
|
|
271
|
-
if self.pod_template is None:
|
|
272
|
-
return {}
|
|
273
|
-
return {_PRIMARY_CONTAINER_NAME_FIELD: self.primary_container_name}
|
flyte/git/__init__.py
ADDED
flyte/git/_config.py
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
import pathlib
|
|
2
|
+
import subprocess
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Dict, Protocol
|
|
6
|
+
|
|
7
|
+
import flyte.config
|
|
8
|
+
from flyte._logging import logger
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class GitUrlBuilder(Protocol):
|
|
12
|
+
@staticmethod
|
|
13
|
+
def build_url(remote_url: str, file_path: str, commit_sha: str, line_number: int, is_tree_clean: bool) -> str: ...
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class GithubUrlBuilder(GitUrlBuilder):
|
|
17
|
+
host_name = "github.com"
|
|
18
|
+
|
|
19
|
+
@staticmethod
|
|
20
|
+
def build_url(remote_url: str, file_path: str, commit_sha: str, line_number: int, is_tree_clean: bool) -> str:
|
|
21
|
+
url = f"{remote_url}/blob/{commit_sha}/{file_path}"
|
|
22
|
+
if is_tree_clean:
|
|
23
|
+
url += f"#L{line_number}"
|
|
24
|
+
return url
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class GitlabUrlBuilder(GitUrlBuilder):
|
|
28
|
+
host_name = "gitlab.com"
|
|
29
|
+
|
|
30
|
+
@staticmethod
|
|
31
|
+
def build_url(remote_url: str, file_path: str, commit_sha: str, line_number: int, is_tree_clean: bool) -> str:
|
|
32
|
+
url = f"{remote_url}/-/blob/{commit_sha}/{file_path}"
|
|
33
|
+
if is_tree_clean:
|
|
34
|
+
url += f"#L{line_number}"
|
|
35
|
+
return url
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
GIT_URL_BUILDER_REGISTRY: Dict[str, GitUrlBuilder] = {
|
|
39
|
+
GithubUrlBuilder.host_name: GithubUrlBuilder,
|
|
40
|
+
GitlabUrlBuilder.host_name: GitlabUrlBuilder,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass(init=True, frozen=True)
|
|
45
|
+
class GitStatus:
|
|
46
|
+
"""
|
|
47
|
+
A class representing the status of a git repository.
|
|
48
|
+
|
|
49
|
+
:param is_valid: Whether git repository is valid
|
|
50
|
+
:param is_tree_clean: Whether working tree is clean
|
|
51
|
+
:param remote_url: Remote URL in HTTPS format
|
|
52
|
+
:param repo_dir: Repository root directory
|
|
53
|
+
:param commit_sha: Current commit SHA
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
is_valid: bool = False
|
|
57
|
+
is_tree_clean: bool = False
|
|
58
|
+
remote_url: str = ""
|
|
59
|
+
repo_dir: Path = Path()
|
|
60
|
+
commit_sha: str = ""
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def from_current_repo(cls) -> "GitStatus":
|
|
64
|
+
"""Discover git information from the current repository.
|
|
65
|
+
|
|
66
|
+
If Git is not installed or .git does not exist, returns GitStatus with is_valid=False.
|
|
67
|
+
|
|
68
|
+
:return: GitStatus instance with discovered git information
|
|
69
|
+
"""
|
|
70
|
+
try:
|
|
71
|
+
# Check if we're in a git repository and get the root directory
|
|
72
|
+
result = subprocess.run(
|
|
73
|
+
["git", "rev-parse", "--show-toplevel"],
|
|
74
|
+
check=False,
|
|
75
|
+
capture_output=True,
|
|
76
|
+
text=True,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
if result.returncode != 0:
|
|
80
|
+
logger.warning("Not in a git repository or git is not installed")
|
|
81
|
+
return cls()
|
|
82
|
+
|
|
83
|
+
repo_dir = Path(result.stdout.strip())
|
|
84
|
+
|
|
85
|
+
# Get current commit SHA
|
|
86
|
+
result = subprocess.run(
|
|
87
|
+
["git", "rev-parse", "HEAD"],
|
|
88
|
+
check=False,
|
|
89
|
+
capture_output=True,
|
|
90
|
+
text=True,
|
|
91
|
+
)
|
|
92
|
+
if result.returncode == 0:
|
|
93
|
+
commit_sha = result.stdout.strip()
|
|
94
|
+
else:
|
|
95
|
+
logger.warning("Failed to get current commit SHA")
|
|
96
|
+
return cls(repo_dir=repo_dir)
|
|
97
|
+
|
|
98
|
+
# Check if working tree is clean
|
|
99
|
+
result = subprocess.run(
|
|
100
|
+
["git", "status", "--porcelain"],
|
|
101
|
+
check=False,
|
|
102
|
+
capture_output=True,
|
|
103
|
+
text=True,
|
|
104
|
+
)
|
|
105
|
+
if result.returncode == 0:
|
|
106
|
+
is_tree_clean = len(result.stdout.strip()) == 0
|
|
107
|
+
else:
|
|
108
|
+
logger.warning("Failed to check if working tree is clean")
|
|
109
|
+
return cls(repo_dir=repo_dir, commit_sha=commit_sha)
|
|
110
|
+
|
|
111
|
+
# Get remote URL
|
|
112
|
+
instance = cls(repo_dir=repo_dir, commit_sha=commit_sha, is_tree_clean=is_tree_clean)
|
|
113
|
+
remote_url = instance._get_remote_url()
|
|
114
|
+
if not remote_url:
|
|
115
|
+
logger.warning("Failed to get remote URL")
|
|
116
|
+
return cls(repo_dir=repo_dir, commit_sha=commit_sha, is_tree_clean=is_tree_clean)
|
|
117
|
+
|
|
118
|
+
return cls(
|
|
119
|
+
is_valid=True,
|
|
120
|
+
is_tree_clean=is_tree_clean,
|
|
121
|
+
remote_url=remote_url,
|
|
122
|
+
repo_dir=repo_dir,
|
|
123
|
+
commit_sha=commit_sha,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
except Exception as e:
|
|
127
|
+
logger.debug(f"Failed to discover git repository: {e}")
|
|
128
|
+
return cls()
|
|
129
|
+
|
|
130
|
+
def _get_remote_url(self) -> str:
|
|
131
|
+
"""Get the remote push URL.
|
|
132
|
+
|
|
133
|
+
Returns the 'origin' remote push URL if it exists, otherwise returns
|
|
134
|
+
the first remote alphabetically. Converts SSH/Git protocol URLs to HTTPS format.
|
|
135
|
+
|
|
136
|
+
:return: The remote push URL in HTTPS format, or empty string if not found
|
|
137
|
+
"""
|
|
138
|
+
try:
|
|
139
|
+
# Try to get origin push remote first
|
|
140
|
+
result = subprocess.run(
|
|
141
|
+
["git", "remote", "get-url", "--push", "origin"],
|
|
142
|
+
check=False,
|
|
143
|
+
capture_output=True,
|
|
144
|
+
text=True,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
if result.returncode == 0:
|
|
148
|
+
url = result.stdout.strip()
|
|
149
|
+
return self._normalize_url_to_https(url)
|
|
150
|
+
|
|
151
|
+
# If origin doesn't exist, get all remotes
|
|
152
|
+
result = subprocess.run(
|
|
153
|
+
["git", "remote"],
|
|
154
|
+
check=False,
|
|
155
|
+
capture_output=True,
|
|
156
|
+
text=True,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
if result.returncode == 0:
|
|
160
|
+
remotes = result.stdout.strip().split("\n")
|
|
161
|
+
if remotes:
|
|
162
|
+
# Sort alphabetically and get the first one
|
|
163
|
+
remotes.sort()
|
|
164
|
+
first_remote = remotes[0]
|
|
165
|
+
|
|
166
|
+
# Get push URL for this remote
|
|
167
|
+
result = subprocess.run(
|
|
168
|
+
["git", "remote", "get-url", "--push", first_remote],
|
|
169
|
+
check=False,
|
|
170
|
+
capture_output=True,
|
|
171
|
+
text=True,
|
|
172
|
+
)
|
|
173
|
+
if result.returncode == 0:
|
|
174
|
+
url = result.stdout.strip()
|
|
175
|
+
return self._normalize_url_to_https(url)
|
|
176
|
+
|
|
177
|
+
return ""
|
|
178
|
+
|
|
179
|
+
except Exception:
|
|
180
|
+
return ""
|
|
181
|
+
|
|
182
|
+
def _normalize_url_to_https(self, url: str) -> str:
|
|
183
|
+
"""Convert SSH or Git protocol URLs to HTTPS format.
|
|
184
|
+
|
|
185
|
+
Examples:
|
|
186
|
+
git@github.com:user/repo.git -> https://github.com/user/repo
|
|
187
|
+
https://github.com/user/repo.git -> https://github.com/user/repo
|
|
188
|
+
|
|
189
|
+
:param url: The Git URL to normalize
|
|
190
|
+
:return: The normalized HTTPS URL
|
|
191
|
+
"""
|
|
192
|
+
# Remove .git suffix first
|
|
193
|
+
url = url.removesuffix(".git")
|
|
194
|
+
|
|
195
|
+
# Handle SSH format: git@host:path or user@host:path
|
|
196
|
+
if url.startswith("git@"):
|
|
197
|
+
parts = url.split("@", 1)
|
|
198
|
+
if len(parts) == 2:
|
|
199
|
+
host_and_path = parts[1].replace(":", "/", 1)
|
|
200
|
+
return f"https://{host_and_path}"
|
|
201
|
+
|
|
202
|
+
return url
|
|
203
|
+
|
|
204
|
+
def _get_remote_host(self, url: str) -> str:
|
|
205
|
+
"""Get the remote host name from a normalized HTTPS URL.
|
|
206
|
+
|
|
207
|
+
:param url: URL that has been normalized to HTTPS format by _normalize_url_to_https
|
|
208
|
+
:return: The host name (e.g., "github.com", "gitlab.com")
|
|
209
|
+
"""
|
|
210
|
+
parts = url.split("//", 1)
|
|
211
|
+
if len(parts) < 2:
|
|
212
|
+
return ""
|
|
213
|
+
|
|
214
|
+
# Get everything after "//" and split by "/"
|
|
215
|
+
host_and_path = parts[1]
|
|
216
|
+
parts = host_and_path.split("/", 1)
|
|
217
|
+
if len(parts) < 2:
|
|
218
|
+
return ""
|
|
219
|
+
host = host_and_path.split("/")[0]
|
|
220
|
+
|
|
221
|
+
return host
|
|
222
|
+
|
|
223
|
+
def _get_file_path(self, path: Path | str) -> str:
|
|
224
|
+
"""Get the path relative to the repository root directory.
|
|
225
|
+
|
|
226
|
+
:param path: Absolute or relative path to a file
|
|
227
|
+
:return: Path relative to repo_dir as string, or empty string if failed
|
|
228
|
+
"""
|
|
229
|
+
try:
|
|
230
|
+
path_obj = Path(path).resolve()
|
|
231
|
+
relative_path = path_obj.relative_to(self.repo_dir)
|
|
232
|
+
return str(relative_path)
|
|
233
|
+
except Exception as e:
|
|
234
|
+
logger.warning(f"Failed to get relative path for {path}: {e}")
|
|
235
|
+
return ""
|
|
236
|
+
|
|
237
|
+
def build_url(self, path: Path | str, line_number: int) -> str:
|
|
238
|
+
"""Build a git URL for the given path.
|
|
239
|
+
|
|
240
|
+
:param path: Path to a file
|
|
241
|
+
:param line_number: Line number of the code file
|
|
242
|
+
:return: Path relative to repo_dir
|
|
243
|
+
"""
|
|
244
|
+
if not self.is_valid:
|
|
245
|
+
logger.warning("GitConfig is not valid, cannot build URL")
|
|
246
|
+
return ""
|
|
247
|
+
host_name = self._get_remote_host(self.remote_url)
|
|
248
|
+
git_file_path = self._get_file_path(path)
|
|
249
|
+
if not host_name:
|
|
250
|
+
logger.warning(f"Failed to extract host name from remote URL: {self.remote_url}")
|
|
251
|
+
return ""
|
|
252
|
+
if not git_file_path:
|
|
253
|
+
return ""
|
|
254
|
+
builder = GIT_URL_BUILDER_REGISTRY.get(host_name)
|
|
255
|
+
if not builder:
|
|
256
|
+
logger.warning(f"URL builder for {host_name} is not implemented")
|
|
257
|
+
return ""
|
|
258
|
+
url = builder.build_url(self.remote_url, git_file_path, self.commit_sha, line_number, self.is_tree_clean)
|
|
259
|
+
return url
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def config_from_root(path: pathlib.Path | str = ".flyte/config.yaml") -> flyte.config.Config | None:
|
|
263
|
+
"""Get the config file from the git root directory.
|
|
264
|
+
|
|
265
|
+
By default, the config file is expected to be in `.flyte/config.yaml` in the git root directory.
|
|
266
|
+
|
|
267
|
+
:param path: Path to the config file relative to git root directory (default: ".flyte/config.yaml")
|
|
268
|
+
:return: Config object if found, None otherwise
|
|
269
|
+
"""
|
|
270
|
+
try:
|
|
271
|
+
result = subprocess.run(["git", "rev-parse", "--show-toplevel"], check=False, capture_output=True, text=True)
|
|
272
|
+
if result.returncode != 0:
|
|
273
|
+
return None
|
|
274
|
+
root = pathlib.Path(result.stdout.strip())
|
|
275
|
+
if not (root / path).exists():
|
|
276
|
+
return None
|
|
277
|
+
return flyte.config.auto(root / path)
|
|
278
|
+
except Exception:
|
|
279
|
+
return None
|
flyte/io/__init__.py
CHANGED
|
@@ -3,9 +3,16 @@
|
|
|
3
3
|
|
|
4
4
|
This package contains additional data types beyond the primitive data types in python to abstract data flow
|
|
5
5
|
of large datasets in Union.
|
|
6
|
+
|
|
6
7
|
"""
|
|
7
8
|
|
|
8
|
-
__all__ = [
|
|
9
|
+
__all__ = [
|
|
10
|
+
"PARQUET",
|
|
11
|
+
"DataFrame",
|
|
12
|
+
"Dir",
|
|
13
|
+
"File",
|
|
14
|
+
]
|
|
9
15
|
|
|
16
|
+
from ._dataframe import PARQUET, DataFrame
|
|
10
17
|
from ._dir import Dir
|
|
11
18
|
from ._file import File
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Flytekit
|
|
2
|
+
Flytekit DataFrame
|
|
3
3
|
==========================================================
|
|
4
|
-
.. currentmodule::
|
|
4
|
+
.. currentmodule:: flyte.io._dataframe
|
|
5
5
|
|
|
6
6
|
.. autosummary::
|
|
7
7
|
:template: custom.rst
|
|
8
8
|
:toctree: generated/
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
10
|
+
DataFrame
|
|
11
|
+
DataFrameDecoder
|
|
12
|
+
DataFrameEncoder
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
15
|
import functools
|
|
@@ -17,12 +17,13 @@ import functools
|
|
|
17
17
|
from flyte._logging import logger
|
|
18
18
|
from flyte._utils.lazy_module import is_imported
|
|
19
19
|
|
|
20
|
-
from .
|
|
20
|
+
from .dataframe import (
|
|
21
|
+
PARQUET,
|
|
22
|
+
DataFrame,
|
|
23
|
+
DataFrameDecoder,
|
|
24
|
+
DataFrameEncoder,
|
|
25
|
+
DataFrameTransformerEngine,
|
|
21
26
|
DuplicateHandlerError,
|
|
22
|
-
StructuredDataset,
|
|
23
|
-
StructuredDatasetDecoder,
|
|
24
|
-
StructuredDatasetEncoder,
|
|
25
|
-
StructuredDatasetTransformerEngine,
|
|
26
27
|
)
|
|
27
28
|
|
|
28
29
|
|
|
@@ -30,8 +31,8 @@ from .structured_dataset import (
|
|
|
30
31
|
def register_csv_handlers():
|
|
31
32
|
from .basic_dfs import CSVToPandasDecodingHandler, PandasToCSVEncodingHandler
|
|
32
33
|
|
|
33
|
-
|
|
34
|
-
|
|
34
|
+
DataFrameTransformerEngine.register(PandasToCSVEncodingHandler(), default_format_for_type=True)
|
|
35
|
+
DataFrameTransformerEngine.register(CSVToPandasDecodingHandler(), default_format_for_type=True)
|
|
35
36
|
|
|
36
37
|
|
|
37
38
|
@functools.lru_cache(maxsize=None)
|
|
@@ -42,9 +43,9 @@ def register_pandas_handlers():
|
|
|
42
43
|
|
|
43
44
|
from .basic_dfs import PandasToParquetEncodingHandler, ParquetToPandasDecodingHandler
|
|
44
45
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
46
|
+
DataFrameTransformerEngine.register(PandasToParquetEncodingHandler(), default_format_for_type=True)
|
|
47
|
+
DataFrameTransformerEngine.register(ParquetToPandasDecodingHandler(), default_format_for_type=True)
|
|
48
|
+
DataFrameTransformerEngine.register_renderer(pd.DataFrame, TopFrameRenderer())
|
|
48
49
|
|
|
49
50
|
|
|
50
51
|
@functools.lru_cache(maxsize=None)
|
|
@@ -55,9 +56,9 @@ def register_arrow_handlers():
|
|
|
55
56
|
|
|
56
57
|
from .basic_dfs import ArrowToParquetEncodingHandler, ParquetToArrowDecodingHandler
|
|
57
58
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
59
|
+
DataFrameTransformerEngine.register(ArrowToParquetEncodingHandler(), default_format_for_type=True)
|
|
60
|
+
DataFrameTransformerEngine.register(ParquetToArrowDecodingHandler(), default_format_for_type=True)
|
|
61
|
+
DataFrameTransformerEngine.register_renderer(pa.Table, ArrowRenderer())
|
|
61
62
|
|
|
62
63
|
|
|
63
64
|
@functools.lru_cache(maxsize=None)
|
|
@@ -70,10 +71,10 @@ def register_bigquery_handlers():
|
|
|
70
71
|
PandasToBQEncodingHandlers,
|
|
71
72
|
)
|
|
72
73
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
74
|
+
DataFrameTransformerEngine.register(PandasToBQEncodingHandlers())
|
|
75
|
+
DataFrameTransformerEngine.register(BQToPandasDecodingHandler())
|
|
76
|
+
DataFrameTransformerEngine.register(ArrowToBQEncodingHandlers())
|
|
77
|
+
DataFrameTransformerEngine.register(BQToArrowDecodingHandler())
|
|
77
78
|
except ImportError:
|
|
78
79
|
logger.info(
|
|
79
80
|
"We won't register bigquery handler for structured dataset because "
|
|
@@ -86,8 +87,8 @@ def register_snowflake_handlers():
|
|
|
86
87
|
try:
|
|
87
88
|
from .snowflake import PandasToSnowflakeEncodingHandlers, SnowflakeToPandasDecodingHandler
|
|
88
89
|
|
|
89
|
-
|
|
90
|
-
|
|
90
|
+
DataFrameTransformerEngine.register(SnowflakeToPandasDecodingHandler())
|
|
91
|
+
DataFrameTransformerEngine.register(PandasToSnowflakeEncodingHandlers())
|
|
91
92
|
|
|
92
93
|
except ImportError:
|
|
93
94
|
logger.info(
|
|
@@ -96,7 +97,7 @@ def register_snowflake_handlers():
|
|
|
96
97
|
)
|
|
97
98
|
|
|
98
99
|
|
|
99
|
-
def
|
|
100
|
+
def lazy_import_dataframe_handler():
|
|
100
101
|
if is_imported("pandas"):
|
|
101
102
|
try:
|
|
102
103
|
register_pandas_handlers()
|
|
@@ -121,9 +122,10 @@ def lazy_import_structured_dataset_handler():
|
|
|
121
122
|
|
|
122
123
|
|
|
123
124
|
__all__ = [
|
|
124
|
-
"
|
|
125
|
-
"
|
|
126
|
-
"
|
|
127
|
-
"
|
|
128
|
-
"
|
|
125
|
+
"PARQUET",
|
|
126
|
+
"DataFrame",
|
|
127
|
+
"DataFrameDecoder",
|
|
128
|
+
"DataFrameEncoder",
|
|
129
|
+
"DataFrameTransformerEngine",
|
|
130
|
+
"lazy_import_dataframe_handler",
|
|
129
131
|
]
|