flyte 0.2.0b1__py3-none-any.whl → 2.0.0b46__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flyte/__init__.py +83 -30
- flyte/_bin/connect.py +61 -0
- flyte/_bin/debug.py +38 -0
- flyte/_bin/runtime.py +87 -19
- flyte/_bin/serve.py +351 -0
- flyte/_build.py +3 -2
- flyte/_cache/cache.py +6 -5
- flyte/_cache/local_cache.py +216 -0
- flyte/_code_bundle/_ignore.py +31 -5
- flyte/_code_bundle/_packaging.py +42 -11
- flyte/_code_bundle/_utils.py +57 -34
- flyte/_code_bundle/bundle.py +130 -27
- flyte/_constants.py +1 -0
- flyte/_context.py +21 -5
- flyte/_custom_context.py +73 -0
- flyte/_debug/constants.py +37 -0
- flyte/_debug/utils.py +17 -0
- flyte/_debug/vscode.py +315 -0
- flyte/_deploy.py +396 -75
- flyte/_deployer.py +109 -0
- flyte/_environment.py +94 -11
- flyte/_excepthook.py +37 -0
- flyte/_group.py +2 -1
- flyte/_hash.py +1 -16
- flyte/_image.py +544 -231
- flyte/_initialize.py +456 -316
- flyte/_interface.py +40 -5
- flyte/_internal/controllers/__init__.py +22 -8
- flyte/_internal/controllers/_local_controller.py +159 -35
- flyte/_internal/controllers/_trace.py +18 -10
- flyte/_internal/controllers/remote/__init__.py +38 -9
- flyte/_internal/controllers/remote/_action.py +82 -12
- flyte/_internal/controllers/remote/_client.py +6 -2
- flyte/_internal/controllers/remote/_controller.py +290 -64
- flyte/_internal/controllers/remote/_core.py +155 -95
- flyte/_internal/controllers/remote/_informer.py +40 -20
- flyte/_internal/controllers/remote/_service_protocol.py +2 -2
- flyte/_internal/imagebuild/__init__.py +2 -10
- flyte/_internal/imagebuild/docker_builder.py +391 -84
- flyte/_internal/imagebuild/image_builder.py +111 -55
- flyte/_internal/imagebuild/remote_builder.py +409 -0
- flyte/_internal/imagebuild/utils.py +79 -0
- flyte/_internal/resolvers/_app_env_module.py +92 -0
- flyte/_internal/resolvers/_task_module.py +5 -38
- flyte/_internal/resolvers/app_env.py +26 -0
- flyte/_internal/resolvers/common.py +8 -1
- flyte/_internal/resolvers/default.py +2 -2
- flyte/_internal/runtime/convert.py +319 -36
- flyte/_internal/runtime/entrypoints.py +106 -18
- flyte/_internal/runtime/io.py +71 -23
- flyte/_internal/runtime/resources_serde.py +21 -7
- flyte/_internal/runtime/reuse.py +125 -0
- flyte/_internal/runtime/rusty.py +196 -0
- flyte/_internal/runtime/task_serde.py +239 -66
- flyte/_internal/runtime/taskrunner.py +48 -8
- flyte/_internal/runtime/trigger_serde.py +162 -0
- flyte/_internal/runtime/types_serde.py +7 -16
- flyte/_keyring/file.py +115 -0
- flyte/_link.py +30 -0
- flyte/_logging.py +241 -42
- flyte/_map.py +312 -0
- flyte/_metrics.py +59 -0
- flyte/_module.py +74 -0
- flyte/_pod.py +30 -0
- flyte/_resources.py +296 -33
- flyte/_retry.py +1 -7
- flyte/_reusable_environment.py +72 -7
- flyte/_run.py +462 -132
- flyte/_secret.py +47 -11
- flyte/_serve.py +333 -0
- flyte/_task.py +245 -56
- flyte/_task_environment.py +219 -97
- flyte/_task_plugins.py +47 -0
- flyte/_tools.py +8 -8
- flyte/_trace.py +15 -24
- flyte/_trigger.py +1027 -0
- flyte/_utils/__init__.py +12 -1
- flyte/_utils/asyn.py +3 -1
- flyte/_utils/async_cache.py +139 -0
- flyte/_utils/coro_management.py +5 -4
- flyte/_utils/description_parser.py +19 -0
- flyte/_utils/docker_credentials.py +173 -0
- flyte/_utils/helpers.py +45 -19
- flyte/_utils/module_loader.py +123 -0
- flyte/_utils/org_discovery.py +57 -0
- flyte/_utils/uv_script_parser.py +8 -1
- flyte/_version.py +16 -3
- flyte/app/__init__.py +27 -0
- flyte/app/_app_environment.py +362 -0
- flyte/app/_connector_environment.py +40 -0
- flyte/app/_deploy.py +130 -0
- flyte/app/_parameter.py +343 -0
- flyte/app/_runtime/__init__.py +3 -0
- flyte/app/_runtime/app_serde.py +383 -0
- flyte/app/_types.py +113 -0
- flyte/app/extras/__init__.py +9 -0
- flyte/app/extras/_auth_middleware.py +217 -0
- flyte/app/extras/_fastapi.py +93 -0
- flyte/app/extras/_model_loader/__init__.py +3 -0
- flyte/app/extras/_model_loader/config.py +7 -0
- flyte/app/extras/_model_loader/loader.py +288 -0
- flyte/cli/__init__.py +12 -0
- flyte/cli/_abort.py +28 -0
- flyte/cli/_build.py +114 -0
- flyte/cli/_common.py +493 -0
- flyte/cli/_create.py +371 -0
- flyte/cli/_delete.py +45 -0
- flyte/cli/_deploy.py +401 -0
- flyte/cli/_gen.py +316 -0
- flyte/cli/_get.py +446 -0
- flyte/cli/_option.py +33 -0
- flyte/{_cli → cli}/_params.py +57 -17
- flyte/cli/_plugins.py +209 -0
- flyte/cli/_prefetch.py +292 -0
- flyte/cli/_run.py +690 -0
- flyte/cli/_serve.py +338 -0
- flyte/cli/_update.py +86 -0
- flyte/cli/_user.py +20 -0
- flyte/cli/main.py +246 -0
- flyte/config/__init__.py +2 -167
- flyte/config/_config.py +215 -163
- flyte/config/_internal.py +10 -1
- flyte/config/_reader.py +225 -0
- flyte/connectors/__init__.py +11 -0
- flyte/connectors/_connector.py +330 -0
- flyte/connectors/_server.py +194 -0
- flyte/connectors/utils.py +159 -0
- flyte/errors.py +134 -2
- flyte/extend.py +24 -0
- flyte/extras/_container.py +69 -56
- flyte/git/__init__.py +3 -0
- flyte/git/_config.py +279 -0
- flyte/io/__init__.py +8 -1
- flyte/io/{structured_dataset → _dataframe}/__init__.py +32 -30
- flyte/io/{structured_dataset → _dataframe}/basic_dfs.py +75 -68
- flyte/io/{structured_dataset/structured_dataset.py → _dataframe/dataframe.py} +207 -242
- flyte/io/_dir.py +575 -113
- flyte/io/_file.py +587 -141
- flyte/io/_hashing_io.py +342 -0
- flyte/io/extend.py +7 -0
- flyte/models.py +635 -0
- flyte/prefetch/__init__.py +22 -0
- flyte/prefetch/_hf_model.py +563 -0
- flyte/remote/__init__.py +14 -3
- flyte/remote/_action.py +879 -0
- flyte/remote/_app.py +346 -0
- flyte/remote/_auth_metadata.py +42 -0
- flyte/remote/_client/_protocols.py +62 -4
- flyte/remote/_client/auth/_auth_utils.py +19 -0
- flyte/remote/_client/auth/_authenticators/base.py +8 -2
- flyte/remote/_client/auth/_authenticators/device_code.py +4 -5
- flyte/remote/_client/auth/_authenticators/factory.py +4 -0
- flyte/remote/_client/auth/_authenticators/passthrough.py +79 -0
- flyte/remote/_client/auth/_authenticators/pkce.py +17 -18
- flyte/remote/_client/auth/_channel.py +47 -18
- flyte/remote/_client/auth/_client_config.py +5 -3
- flyte/remote/_client/auth/_keyring.py +15 -2
- flyte/remote/_client/auth/_token_client.py +3 -3
- flyte/remote/_client/controlplane.py +206 -18
- flyte/remote/_common.py +66 -0
- flyte/remote/_data.py +107 -22
- flyte/remote/_logs.py +116 -33
- flyte/remote/_project.py +21 -19
- flyte/remote/_run.py +164 -631
- flyte/remote/_secret.py +72 -29
- flyte/remote/_task.py +387 -46
- flyte/remote/_trigger.py +368 -0
- flyte/remote/_user.py +43 -0
- flyte/report/_report.py +10 -6
- flyte/storage/__init__.py +13 -1
- flyte/storage/_config.py +237 -0
- flyte/storage/_parallel_reader.py +289 -0
- flyte/storage/_storage.py +268 -59
- flyte/syncify/__init__.py +56 -0
- flyte/syncify/_api.py +414 -0
- flyte/types/__init__.py +39 -0
- flyte/types/_interface.py +22 -7
- flyte/{io/pickle/transformer.py → types/_pickle.py} +37 -9
- flyte/types/_string_literals.py +8 -9
- flyte/types/_type_engine.py +226 -126
- flyte/types/_utils.py +1 -1
- flyte-2.0.0b46.data/scripts/debug.py +38 -0
- flyte-2.0.0b46.data/scripts/runtime.py +194 -0
- flyte-2.0.0b46.dist-info/METADATA +352 -0
- flyte-2.0.0b46.dist-info/RECORD +221 -0
- flyte-2.0.0b46.dist-info/entry_points.txt +8 -0
- flyte-2.0.0b46.dist-info/licenses/LICENSE +201 -0
- flyte/_api_commons.py +0 -3
- flyte/_cli/_common.py +0 -299
- flyte/_cli/_create.py +0 -42
- flyte/_cli/_delete.py +0 -23
- flyte/_cli/_deploy.py +0 -140
- flyte/_cli/_get.py +0 -235
- flyte/_cli/_run.py +0 -174
- flyte/_cli/main.py +0 -98
- flyte/_datastructures.py +0 -342
- flyte/_internal/controllers/pbhash.py +0 -39
- flyte/_protos/common/authorization_pb2.py +0 -66
- flyte/_protos/common/authorization_pb2.pyi +0 -108
- flyte/_protos/common/authorization_pb2_grpc.py +0 -4
- flyte/_protos/common/identifier_pb2.py +0 -71
- flyte/_protos/common/identifier_pb2.pyi +0 -82
- flyte/_protos/common/identifier_pb2_grpc.py +0 -4
- flyte/_protos/common/identity_pb2.py +0 -48
- flyte/_protos/common/identity_pb2.pyi +0 -72
- flyte/_protos/common/identity_pb2_grpc.py +0 -4
- flyte/_protos/common/list_pb2.py +0 -36
- flyte/_protos/common/list_pb2.pyi +0 -69
- flyte/_protos/common/list_pb2_grpc.py +0 -4
- flyte/_protos/common/policy_pb2.py +0 -37
- flyte/_protos/common/policy_pb2.pyi +0 -27
- flyte/_protos/common/policy_pb2_grpc.py +0 -4
- flyte/_protos/common/role_pb2.py +0 -37
- flyte/_protos/common/role_pb2.pyi +0 -53
- flyte/_protos/common/role_pb2_grpc.py +0 -4
- flyte/_protos/common/runtime_version_pb2.py +0 -28
- flyte/_protos/common/runtime_version_pb2.pyi +0 -24
- flyte/_protos/common/runtime_version_pb2_grpc.py +0 -4
- flyte/_protos/logs/dataplane/payload_pb2.py +0 -96
- flyte/_protos/logs/dataplane/payload_pb2.pyi +0 -168
- flyte/_protos/logs/dataplane/payload_pb2_grpc.py +0 -4
- flyte/_protos/secret/definition_pb2.py +0 -49
- flyte/_protos/secret/definition_pb2.pyi +0 -93
- flyte/_protos/secret/definition_pb2_grpc.py +0 -4
- flyte/_protos/secret/payload_pb2.py +0 -62
- flyte/_protos/secret/payload_pb2.pyi +0 -94
- flyte/_protos/secret/payload_pb2_grpc.py +0 -4
- flyte/_protos/secret/secret_pb2.py +0 -38
- flyte/_protos/secret/secret_pb2.pyi +0 -6
- flyte/_protos/secret/secret_pb2_grpc.py +0 -198
- flyte/_protos/secret/secret_pb2_grpc_grpc.py +0 -198
- flyte/_protos/validate/validate/validate_pb2.py +0 -76
- flyte/_protos/workflow/node_execution_service_pb2.py +0 -26
- flyte/_protos/workflow/node_execution_service_pb2.pyi +0 -4
- flyte/_protos/workflow/node_execution_service_pb2_grpc.py +0 -32
- flyte/_protos/workflow/queue_service_pb2.py +0 -106
- flyte/_protos/workflow/queue_service_pb2.pyi +0 -141
- flyte/_protos/workflow/queue_service_pb2_grpc.py +0 -172
- flyte/_protos/workflow/run_definition_pb2.py +0 -128
- flyte/_protos/workflow/run_definition_pb2.pyi +0 -310
- flyte/_protos/workflow/run_definition_pb2_grpc.py +0 -4
- flyte/_protos/workflow/run_logs_service_pb2.py +0 -41
- flyte/_protos/workflow/run_logs_service_pb2.pyi +0 -28
- flyte/_protos/workflow/run_logs_service_pb2_grpc.py +0 -69
- flyte/_protos/workflow/run_service_pb2.py +0 -133
- flyte/_protos/workflow/run_service_pb2.pyi +0 -175
- flyte/_protos/workflow/run_service_pb2_grpc.py +0 -412
- flyte/_protos/workflow/state_service_pb2.py +0 -58
- flyte/_protos/workflow/state_service_pb2.pyi +0 -71
- flyte/_protos/workflow/state_service_pb2_grpc.py +0 -138
- flyte/_protos/workflow/task_definition_pb2.py +0 -72
- flyte/_protos/workflow/task_definition_pb2.pyi +0 -65
- flyte/_protos/workflow/task_definition_pb2_grpc.py +0 -4
- flyte/_protos/workflow/task_service_pb2.py +0 -44
- flyte/_protos/workflow/task_service_pb2.pyi +0 -31
- flyte/_protos/workflow/task_service_pb2_grpc.py +0 -104
- flyte/io/_dataframe.py +0 -0
- flyte/io/pickle/__init__.py +0 -0
- flyte/remote/_console.py +0 -18
- flyte-0.2.0b1.dist-info/METADATA +0 -179
- flyte-0.2.0b1.dist-info/RECORD +0 -204
- flyte-0.2.0b1.dist-info/entry_points.txt +0 -3
- /flyte/{_cli → _debug}/__init__.py +0 -0
- /flyte/{_protos → _keyring}/__init__.py +0 -0
- {flyte-0.2.0b1.dist-info → flyte-2.0.0b46.dist-info}/WHEEL +0 -0
- {flyte-0.2.0b1.dist-info → flyte-2.0.0b46.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
from http import HTTPStatus
|
|
5
|
+
from typing import Callable, Dict, List, Tuple, Type, Union
|
|
6
|
+
|
|
7
|
+
import grpc
|
|
8
|
+
from flyteidl2.connector.connector_pb2 import (
|
|
9
|
+
CreateTaskRequest,
|
|
10
|
+
CreateTaskResponse,
|
|
11
|
+
DeleteTaskRequest,
|
|
12
|
+
DeleteTaskResponse,
|
|
13
|
+
GetConnectorRequest,
|
|
14
|
+
GetConnectorResponse,
|
|
15
|
+
GetTaskLogsRequest,
|
|
16
|
+
GetTaskLogsResponse,
|
|
17
|
+
GetTaskMetricsRequest,
|
|
18
|
+
GetTaskMetricsResponse,
|
|
19
|
+
GetTaskRequest,
|
|
20
|
+
GetTaskResponse,
|
|
21
|
+
ListConnectorsRequest,
|
|
22
|
+
ListConnectorsResponse,
|
|
23
|
+
)
|
|
24
|
+
from flyteidl2.connector.service_pb2_grpc import AsyncConnectorServiceServicer, ConnectorMetadataServiceServicer
|
|
25
|
+
from flyteidl2.core.security_pb2 import Connection
|
|
26
|
+
from prometheus_client import Counter, Summary
|
|
27
|
+
|
|
28
|
+
from flyte._internal.runtime.convert import Inputs, convert_from_inputs_to_native
|
|
29
|
+
from flyte._logging import logger
|
|
30
|
+
from flyte.connectors._connector import ConnectorRegistry, FlyteConnectorNotFound, get_resource_proto
|
|
31
|
+
from flyte.connectors.utils import _start_grpc_server
|
|
32
|
+
from flyte.models import NativeInterface, _has_default
|
|
33
|
+
from flyte.syncify import syncify
|
|
34
|
+
from flyte.types import TypeEngine
|
|
35
|
+
|
|
36
|
+
metric_prefix = "flyte_connector_"
|
|
37
|
+
create_operation = "create"
|
|
38
|
+
get_operation = "get"
|
|
39
|
+
delete_operation = "delete"
|
|
40
|
+
|
|
41
|
+
# Follow the naming convention. https://prometheus.io/docs/practices/naming/
|
|
42
|
+
request_success_count = Counter(
|
|
43
|
+
f"{metric_prefix}requests_success_total",
|
|
44
|
+
"Total number of successful requests",
|
|
45
|
+
["task_type", "operation"],
|
|
46
|
+
)
|
|
47
|
+
request_failure_count = Counter(
|
|
48
|
+
f"{metric_prefix}requests_failure_total",
|
|
49
|
+
"Total number of failed requests",
|
|
50
|
+
["task_type", "operation", "error_code"],
|
|
51
|
+
)
|
|
52
|
+
request_latency = Summary(
|
|
53
|
+
f"{metric_prefix}request_latency_seconds",
|
|
54
|
+
"Time spent processing connector request",
|
|
55
|
+
["task_type", "operation"],
|
|
56
|
+
)
|
|
57
|
+
input_literal_size = Summary(f"{metric_prefix}input_literal_bytes", "Size of input literal", ["task_type"])
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _handle_exception(e: Exception, context: grpc.ServicerContext, task_type: str, operation: str):
|
|
61
|
+
if isinstance(e, FlyteConnectorNotFound):
|
|
62
|
+
error_message = f"Cannot find connector for task type: {task_type}."
|
|
63
|
+
logger.error(error_message)
|
|
64
|
+
context.set_code(grpc.StatusCode.NOT_FOUND)
|
|
65
|
+
context.set_details(error_message)
|
|
66
|
+
request_failure_count.labels(task_type=task_type, operation=operation, error_code=HTTPStatus.NOT_FOUND).inc()
|
|
67
|
+
else:
|
|
68
|
+
error_message = f"failed to {operation} {task_type} task with error:\n {e}."
|
|
69
|
+
logger.error(error_message)
|
|
70
|
+
context.set_code(grpc.StatusCode.INTERNAL)
|
|
71
|
+
context.set_details(error_message)
|
|
72
|
+
request_failure_count.labels(
|
|
73
|
+
task_type=task_type, operation=operation, error_code=HTTPStatus.INTERNAL_SERVER_ERROR
|
|
74
|
+
).inc()
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class ConnectorService:
|
|
78
|
+
@syncify
|
|
79
|
+
@classmethod
|
|
80
|
+
async def run(cls, port: int, prometheus_port: int, worker: int, timeout: int | None, modules: List[str] | None):
|
|
81
|
+
working_dir = os.getcwd()
|
|
82
|
+
if all(os.path.realpath(path) != working_dir for path in sys.path):
|
|
83
|
+
sys.path.append(working_dir)
|
|
84
|
+
await _start_grpc_server(port, prometheus_port, worker, timeout, modules)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def record_connector_metrics(func: Callable):
|
|
88
|
+
async def wrapper(
|
|
89
|
+
self,
|
|
90
|
+
request: Union[CreateTaskRequest, GetTaskRequest, DeleteTaskRequest],
|
|
91
|
+
context: grpc.ServicerContext,
|
|
92
|
+
*args,
|
|
93
|
+
**kwargs,
|
|
94
|
+
):
|
|
95
|
+
if isinstance(request, CreateTaskRequest):
|
|
96
|
+
task_type = request.template.type
|
|
97
|
+
operation = create_operation
|
|
98
|
+
if request.inputs:
|
|
99
|
+
input_literal_size.labels(task_type=task_type).observe(request.inputs.ByteSize())
|
|
100
|
+
elif isinstance(request, GetTaskRequest):
|
|
101
|
+
task_type = request.task_category.name
|
|
102
|
+
operation = get_operation
|
|
103
|
+
elif isinstance(request, DeleteTaskRequest):
|
|
104
|
+
task_type = request.task_category.name
|
|
105
|
+
operation = delete_operation
|
|
106
|
+
else:
|
|
107
|
+
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
108
|
+
context.set_details("Method not implemented!")
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
with request_latency.labels(task_type=task_type, operation=operation).time():
|
|
113
|
+
res = await func(self, request, context, *args, **kwargs)
|
|
114
|
+
request_success_count.labels(task_type=task_type, operation=operation).inc()
|
|
115
|
+
return res
|
|
116
|
+
except Exception as e:
|
|
117
|
+
_handle_exception(e, context, task_type, operation)
|
|
118
|
+
|
|
119
|
+
return wrapper
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _get_connection_kwargs(request: Connection) -> Dict[str, str]:
|
|
123
|
+
kwargs = {}
|
|
124
|
+
|
|
125
|
+
for k, v in request.secrets.items():
|
|
126
|
+
kwargs[k] = v
|
|
127
|
+
for k, v in request.configs.items():
|
|
128
|
+
kwargs[k] = v
|
|
129
|
+
|
|
130
|
+
return kwargs
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class AsyncConnectorService(AsyncConnectorServiceServicer):
|
|
134
|
+
@record_connector_metrics
|
|
135
|
+
async def CreateTask(self, request: CreateTaskRequest, context: grpc.ServicerContext) -> CreateTaskResponse:
|
|
136
|
+
template = request.template
|
|
137
|
+
connector = ConnectorRegistry.get_connector(template.type, template.task_type_version)
|
|
138
|
+
logger.info(f"{connector.name} start creating the job")
|
|
139
|
+
python_interface_inputs: Dict[str, Tuple[Type, Type[_has_default] | Type[inspect._empty]]] = {
|
|
140
|
+
name: (TypeEngine.guess_python_type(lt.type), inspect.Parameter.empty)
|
|
141
|
+
for name, lt in template.interface.inputs.variables.items()
|
|
142
|
+
}
|
|
143
|
+
native_interface = NativeInterface.from_types(inputs=python_interface_inputs, outputs={})
|
|
144
|
+
native_inputs = await convert_from_inputs_to_native(native_interface, Inputs(proto_inputs=request.inputs))
|
|
145
|
+
resource_meta = await connector.create(
|
|
146
|
+
task_template=request.template,
|
|
147
|
+
inputs=native_inputs,
|
|
148
|
+
output_prefix=request.output_prefix,
|
|
149
|
+
task_execution_metadata=request.task_execution_metadata,
|
|
150
|
+
**_get_connection_kwargs(request.connection),
|
|
151
|
+
)
|
|
152
|
+
return CreateTaskResponse(resource_meta=resource_meta.encode())
|
|
153
|
+
|
|
154
|
+
@record_connector_metrics
|
|
155
|
+
async def GetTask(self, request: GetTaskRequest, context: grpc.ServicerContext) -> GetTaskResponse:
|
|
156
|
+
connector = ConnectorRegistry.get_connector(request.task_category.name, request.task_category.version)
|
|
157
|
+
logger.info(f"{connector.name} start checking the status of the job")
|
|
158
|
+
res = await connector.get(
|
|
159
|
+
resource_meta=connector.metadata_type.decode(request.resource_meta),
|
|
160
|
+
**_get_connection_kwargs(request.connection),
|
|
161
|
+
)
|
|
162
|
+
return GetTaskResponse(resource=await get_resource_proto(res))
|
|
163
|
+
|
|
164
|
+
@record_connector_metrics
|
|
165
|
+
async def DeleteTask(self, request: DeleteTaskRequest, context: grpc.ServicerContext) -> DeleteTaskResponse:
|
|
166
|
+
connector = ConnectorRegistry.get_connector(request.task_category.name, request.task_category.version)
|
|
167
|
+
logger.info(f"{connector.name} start deleting the job")
|
|
168
|
+
await connector.delete(
|
|
169
|
+
resource_meta=connector.metadata_type.decode(request.resource_meta),
|
|
170
|
+
**_get_connection_kwargs(request.connection),
|
|
171
|
+
)
|
|
172
|
+
return DeleteTaskResponse()
|
|
173
|
+
|
|
174
|
+
async def GetTaskMetrics(
|
|
175
|
+
self, request: GetTaskMetricsRequest, context: grpc.ServicerContext
|
|
176
|
+
) -> GetTaskMetricsResponse:
|
|
177
|
+
connector = ConnectorRegistry.get_connector(request.task_category.name, request.task_category.version)
|
|
178
|
+
logger.info(f"{connector.name} start getting metrics of the job")
|
|
179
|
+
return await connector.get_metrics(resource_meta=connector.metadata_type.decode(request.resource_meta))
|
|
180
|
+
|
|
181
|
+
async def GetTaskLogs(self, request: GetTaskLogsRequest, context: grpc.ServicerContext) -> GetTaskLogsResponse:
|
|
182
|
+
connector = ConnectorRegistry.get_connector(request.task_category.name, request.task_category.version)
|
|
183
|
+
logger.info(f"{connector.name} start getting logs of the job")
|
|
184
|
+
return await connector.get_logs(resource_meta=connector.metadata_type.decode(request.resource_meta))
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class ConnectorMetadataService(ConnectorMetadataServiceServicer):
|
|
188
|
+
async def GetConnector(self, request: GetConnectorRequest, context: grpc.ServicerContext) -> GetConnectorResponse:
|
|
189
|
+
return GetConnectorResponse(connector=ConnectorRegistry._get_connector_metadata(request.name))
|
|
190
|
+
|
|
191
|
+
async def ListConnectors(
|
|
192
|
+
self, request: ListConnectorsRequest, context: grpc.ServicerContext
|
|
193
|
+
) -> ListConnectorsResponse:
|
|
194
|
+
return ListConnectorsResponse(connectors=ConnectorRegistry._list_connectors())
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
from concurrent import futures
|
|
3
|
+
from importlib.metadata import entry_points
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
import grpc
|
|
8
|
+
from flyteidl2.connector import service_pb2
|
|
9
|
+
from flyteidl2.connector.service_pb2_grpc import (
|
|
10
|
+
add_AsyncConnectorServiceServicer_to_server,
|
|
11
|
+
add_ConnectorMetadataServiceServicer_to_server,
|
|
12
|
+
)
|
|
13
|
+
from flyteidl2.core.execution_pb2 import TaskExecution
|
|
14
|
+
from flyteidl2.core.tasks_pb2 import TaskTemplate
|
|
15
|
+
from rich.console import Console
|
|
16
|
+
from rich.table import Table
|
|
17
|
+
|
|
18
|
+
import flyte
|
|
19
|
+
from flyte import logger
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def is_terminal_phase(phase: TaskExecution.Phase) -> bool:
|
|
23
|
+
"""
|
|
24
|
+
Return true if the phase is terminal.
|
|
25
|
+
"""
|
|
26
|
+
return phase in [TaskExecution.SUCCEEDED, TaskExecution.ABORTED, TaskExecution.FAILED]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def convert_to_flyte_phase(state: str) -> TaskExecution.Phase:
|
|
30
|
+
"""
|
|
31
|
+
Convert the state from the connector to the phase in flyte.
|
|
32
|
+
"""
|
|
33
|
+
state = state.lower()
|
|
34
|
+
if state in ["failed", "timeout", "timedout", "canceled", "cancelled", "skipped"]:
|
|
35
|
+
return TaskExecution.FAILED
|
|
36
|
+
if state in ["internal_error"]:
|
|
37
|
+
return TaskExecution.RETRYABLE_FAILED
|
|
38
|
+
elif state in ["done", "succeeded", "success", "completed"]:
|
|
39
|
+
return TaskExecution.SUCCEEDED
|
|
40
|
+
elif state in ["running", "terminating"]:
|
|
41
|
+
return TaskExecution.RUNNING
|
|
42
|
+
elif state in ["pending"]:
|
|
43
|
+
return TaskExecution.INITIALIZING
|
|
44
|
+
raise ValueError(f"Unrecognized state: {state}")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
async def _start_grpc_server(
|
|
48
|
+
port: int, prometheus_port: int, worker: int, timeout: int | None, modules: List[str] | None
|
|
49
|
+
):
|
|
50
|
+
try:
|
|
51
|
+
from flyte.connectors._server import (
|
|
52
|
+
AsyncConnectorService,
|
|
53
|
+
ConnectorMetadataService,
|
|
54
|
+
)
|
|
55
|
+
except ImportError as e:
|
|
56
|
+
raise ImportError(
|
|
57
|
+
"Flyte connector dependencies are not installed."
|
|
58
|
+
" Please install it using `pip install flyteplugins-connector`"
|
|
59
|
+
) from e
|
|
60
|
+
|
|
61
|
+
click.secho("🚀 Starting the connector service...")
|
|
62
|
+
_load_connectors(modules)
|
|
63
|
+
_start_http_server(prometheus_port)
|
|
64
|
+
|
|
65
|
+
print_metadata()
|
|
66
|
+
|
|
67
|
+
server = grpc.aio.server(futures.ThreadPoolExecutor(max_workers=worker))
|
|
68
|
+
|
|
69
|
+
add_AsyncConnectorServiceServicer_to_server(AsyncConnectorService(), server)
|
|
70
|
+
add_ConnectorMetadataServiceServicer_to_server(ConnectorMetadataService(), server)
|
|
71
|
+
_start_health_check_server(server, worker)
|
|
72
|
+
|
|
73
|
+
server.add_insecure_port(f"[::]:{port}")
|
|
74
|
+
await server.start()
|
|
75
|
+
await server.wait_for_termination(timeout)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _start_http_server(prometheus_port: int):
|
|
79
|
+
try:
|
|
80
|
+
from prometheus_client import start_http_server
|
|
81
|
+
|
|
82
|
+
click.secho("Starting up the server to expose the prometheus metrics...")
|
|
83
|
+
start_http_server(prometheus_port)
|
|
84
|
+
except ImportError as e:
|
|
85
|
+
click.secho(f"Failed to start the prometheus server with error {e}", fg="red")
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _start_health_check_server(server: grpc.Server, worker: int):
|
|
89
|
+
try:
|
|
90
|
+
from grpc_health.v1 import health, health_pb2, health_pb2_grpc
|
|
91
|
+
|
|
92
|
+
health_servicer = health.HealthServicer(
|
|
93
|
+
experimental_non_blocking=True,
|
|
94
|
+
experimental_thread_pool=futures.ThreadPoolExecutor(max_workers=worker),
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
for service in service_pb2.DESCRIPTOR.services_by_name.values():
|
|
98
|
+
health_servicer.set(service.full_name, health_pb2.HealthCheckResponse.SERVING)
|
|
99
|
+
health_servicer.set(health.SERVICE_NAME, health_pb2.HealthCheckResponse.SERVING)
|
|
100
|
+
|
|
101
|
+
health_pb2_grpc.add_HealthServicer_to_server(health_servicer, server)
|
|
102
|
+
|
|
103
|
+
except ImportError as e:
|
|
104
|
+
click.secho(f"Failed to start the health check servicer with error {e}", fg="red")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def print_metadata():
|
|
108
|
+
from flyte.connectors import ConnectorRegistry
|
|
109
|
+
|
|
110
|
+
connectors = ConnectorRegistry._list_connectors()
|
|
111
|
+
|
|
112
|
+
table = Table(title="Connector Metadata")
|
|
113
|
+
table.add_column("Connector Name", style="cyan", no_wrap=True)
|
|
114
|
+
table.add_column("Support Task Types", style="cyan")
|
|
115
|
+
|
|
116
|
+
for connector in connectors:
|
|
117
|
+
categories = ""
|
|
118
|
+
for category in connector.supported_task_categories:
|
|
119
|
+
categories += f"{category.name} ({category.version}) "
|
|
120
|
+
table.add_row(connector.name, categories)
|
|
121
|
+
|
|
122
|
+
console = Console()
|
|
123
|
+
console.print(table)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _load_connectors(modules: List[str] | None):
|
|
127
|
+
plugins = entry_points(group="flyte.connectors")
|
|
128
|
+
for ep in plugins:
|
|
129
|
+
try:
|
|
130
|
+
logger.info(f"Loading connector: {ep.name}")
|
|
131
|
+
ep.load()
|
|
132
|
+
except Exception as e:
|
|
133
|
+
logger.warning(f"Failed to load connector '{ep.name}' with error: {e}")
|
|
134
|
+
|
|
135
|
+
if modules:
|
|
136
|
+
for m in modules:
|
|
137
|
+
importlib.import_module(m)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _render_task_template(tt: TaskTemplate, file_prefix: str) -> TaskTemplate:
|
|
141
|
+
if tt.container is None:
|
|
142
|
+
return tt
|
|
143
|
+
args = tt.container.args
|
|
144
|
+
ctx = flyte.ctx()
|
|
145
|
+
for i in range(len(args)):
|
|
146
|
+
tt.container.args[i] = args[i].replace("{{.input}}", f"{file_prefix}/inputs.pb")
|
|
147
|
+
tt.container.args[i] = args[i].replace("{{.outputPrefix}}", f"{file_prefix}")
|
|
148
|
+
tt.container.args[i] = args[i].replace("{{.rawOutputDataPrefix}}", f"{file_prefix}/raw_output")
|
|
149
|
+
tt.container.args[i] = args[i].replace("{{.checkpointOutputPrefix}}", f"{file_prefix}/checkpoint_output")
|
|
150
|
+
tt.container.args[i] = args[i].replace("{{.prevCheckpointPrefix}}", f"{file_prefix}/prev_checkpoint")
|
|
151
|
+
tt.container.args[i] = args[i].replace("{{.runName}}", ctx.action.run_name if ctx else "test-run")
|
|
152
|
+
tt.container.args[i] = args[i].replace("{{.actionName}}", "a1")
|
|
153
|
+
|
|
154
|
+
# Add additional required args
|
|
155
|
+
tt.container.args[1:1] = ["--run-base-dir", f"{file_prefix}/base_dir"]
|
|
156
|
+
tt.container.args[1:1] = ["--org", "test-org"]
|
|
157
|
+
tt.container.args[1:1] = ["--project", "test-project"]
|
|
158
|
+
tt.container.args[1:1] = ["--domain", "test-domain"]
|
|
159
|
+
return tt
|
flyte/errors.py
CHANGED
|
@@ -10,6 +10,16 @@ from typing import Literal
|
|
|
10
10
|
ErrorKind = Literal["system", "unknown", "user"]
|
|
11
11
|
|
|
12
12
|
|
|
13
|
+
def silence_grpc_polling_error(loop, context):
|
|
14
|
+
"""
|
|
15
|
+
Suppress specific gRPC polling errors in the event loop.
|
|
16
|
+
"""
|
|
17
|
+
exc = context.get("exception")
|
|
18
|
+
if isinstance(exc, BlockingIOError):
|
|
19
|
+
return # suppress
|
|
20
|
+
loop.default_exception_handler(context)
|
|
21
|
+
|
|
22
|
+
|
|
13
23
|
class BaseRuntimeError(RuntimeError):
|
|
14
24
|
"""
|
|
15
25
|
Base class for all Union runtime errors. These errors are raised when the underlying task execution fails, either
|
|
@@ -86,6 +96,9 @@ class TaskTimeoutError(RuntimeUserError):
|
|
|
86
96
|
This error is raised when the underlying task execution runs for longer than the specified timeout.
|
|
87
97
|
"""
|
|
88
98
|
|
|
99
|
+
def __init__(self, message: str):
|
|
100
|
+
super().__init__("TaskTimeoutError", message, "user")
|
|
101
|
+
|
|
89
102
|
|
|
90
103
|
class RetriesExhaustedError(RuntimeUserError):
|
|
91
104
|
"""
|
|
@@ -119,7 +132,9 @@ class CustomError(RuntimeUserError):
|
|
|
119
132
|
Create a CustomError from an exception. The exception's class name is used as the error code and the exception
|
|
120
133
|
message is used as the error message.
|
|
121
134
|
"""
|
|
122
|
-
|
|
135
|
+
new_exc = cls(e.__class__.__name__, str(e))
|
|
136
|
+
new_exc.__cause__ = e
|
|
137
|
+
return new_exc
|
|
123
138
|
|
|
124
139
|
|
|
125
140
|
class NotInTaskContextError(RuntimeUserError):
|
|
@@ -134,10 +149,127 @@ class ActionNotFoundError(RuntimeError):
|
|
|
134
149
|
"""
|
|
135
150
|
|
|
136
151
|
|
|
152
|
+
# NOTE: Use RemoteTaskError instead, since "reference tasks" (from v1) are now
|
|
153
|
+
# simply "remote tasks" in v2.
|
|
137
154
|
class ReferenceTaskError(RuntimeUserError):
|
|
138
155
|
"""
|
|
139
156
|
This error is raised when the user tries to access a task that does not exist.
|
|
140
157
|
"""
|
|
141
158
|
|
|
159
|
+
CODE = "ReferenceTaskUsageError"
|
|
160
|
+
|
|
161
|
+
def __init__(self, message: str):
|
|
162
|
+
super().__init__(self.CODE, message, "user")
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class RemoteTaskError(ReferenceTaskError):
|
|
166
|
+
"""
|
|
167
|
+
This error is raised when the user tries to access a task that does not exist.
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
CODE = "RemoteTaskUsageError"
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class LogsNotYetAvailableError(BaseRuntimeError):
|
|
174
|
+
"""
|
|
175
|
+
This error is raised when the logs are not yet available for a task.
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
def __init__(self, message: str):
|
|
179
|
+
super().__init__("LogsNotYetAvailable", "system", message, None)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class RuntimeDataValidationError(RuntimeUserError):
|
|
183
|
+
"""
|
|
184
|
+
This error is raised when the user tries to access a resource that does not exist or is invalid.
|
|
185
|
+
"""
|
|
186
|
+
|
|
187
|
+
def __init__(self, var: str, e: Exception | str, task_name: str = ""):
|
|
188
|
+
super().__init__(
|
|
189
|
+
"DataValidationError", f"In task {task_name} variable {var}, failed to serialize/deserialize because of {e}"
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
class DeploymentError(RuntimeUserError):
|
|
194
|
+
"""
|
|
195
|
+
This error is raised when the deployment of a task fails, or some preconditions for deployment are not met.
|
|
196
|
+
"""
|
|
197
|
+
|
|
198
|
+
def __init__(self, message: str):
|
|
199
|
+
super().__init__("DeploymentError", message, "user")
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class ImageBuildError(RuntimeUserError):
|
|
203
|
+
"""
|
|
204
|
+
This error is raised when the image build fails.
|
|
205
|
+
"""
|
|
206
|
+
|
|
207
|
+
def __init__(self, message: str):
|
|
208
|
+
super().__init__("ImageBuildError", message, "user")
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
class ModuleLoadError(RuntimeUserError):
|
|
212
|
+
"""
|
|
213
|
+
This error is raised when the module cannot be loaded, either because it does not exist or because of a
|
|
214
|
+
syntax error.
|
|
215
|
+
"""
|
|
216
|
+
|
|
217
|
+
def __init__(self, message: str):
|
|
218
|
+
super().__init__("ModuleLoadError", message, "user")
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
class InlineIOMaxBytesBreached(RuntimeUserError):
|
|
222
|
+
"""
|
|
223
|
+
This error is raised when the inline IO max bytes limit is breached.
|
|
224
|
+
This can be adjusted per task by setting max_inline_io_bytes in the task definition.
|
|
225
|
+
"""
|
|
226
|
+
|
|
227
|
+
def __init__(self, message: str):
|
|
228
|
+
super().__init__("InlineIOMaxBytesBreached", message, "user")
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
class RunAbortedError(RuntimeUserError):
|
|
232
|
+
"""
|
|
233
|
+
This error is raised when the run is aborted by the user.
|
|
234
|
+
"""
|
|
235
|
+
|
|
236
|
+
def __init__(self, message: str):
|
|
237
|
+
super().__init__("RunAbortedError", message, "user")
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
class SlowDownError(RuntimeUserError):
|
|
241
|
+
"""
|
|
242
|
+
This error is raised when the user tries to access a resource that does not exist or is invalid.
|
|
243
|
+
"""
|
|
244
|
+
|
|
245
|
+
def __init__(self, message: str):
|
|
246
|
+
super().__init__("SlowDownError", message, "user")
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
class OnlyAsyncIOSupportedError(RuntimeUserError):
|
|
250
|
+
"""
|
|
251
|
+
This error is raised when the user tries to use sync IO in an async task.
|
|
252
|
+
"""
|
|
253
|
+
|
|
254
|
+
def __init__(self, message: str):
|
|
255
|
+
super().__init__("OnlyAsyncIOSupportedError", message, "user")
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
class ParameterMaterializationError(RuntimeUserError):
|
|
259
|
+
"""
|
|
260
|
+
This error is raised when the user tries to use a Parameter in an App, that has delayed Materialization,
|
|
261
|
+
but the materialization fails.
|
|
262
|
+
"""
|
|
263
|
+
|
|
264
|
+
def __init__(self, message: str):
|
|
265
|
+
super().__init__("ParameterMaterializationError", message, "user")
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
class RestrictedTypeError(RuntimeUserError):
|
|
269
|
+
"""
|
|
270
|
+
This error is raised when the user uses a restricted type, for example current a Tuple is not supported for one
|
|
271
|
+
value.
|
|
272
|
+
"""
|
|
273
|
+
|
|
142
274
|
def __init__(self, message: str):
|
|
143
|
-
super().__init__("
|
|
275
|
+
super().__init__("RestrictedTypeUsage", message, "user")
|
flyte/extend.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from flyte._image import Architecture
|
|
2
|
+
|
|
3
|
+
from ._initialize import is_initialized
|
|
4
|
+
from ._internal.imagebuild.image_builder import ImageBuildEngine, ImageBuilder, ImageChecker
|
|
5
|
+
from ._internal.runtime.entrypoints import download_code_bundle
|
|
6
|
+
from ._internal.runtime.resources_serde import get_proto_resources
|
|
7
|
+
from ._resources import PRIMARY_CONTAINER_DEFAULT_NAME, pod_spec_from_resources
|
|
8
|
+
from ._task import AsyncFunctionTaskTemplate, TaskTemplate
|
|
9
|
+
from ._task_plugins import TaskPluginRegistry
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"PRIMARY_CONTAINER_DEFAULT_NAME",
|
|
13
|
+
"Architecture",
|
|
14
|
+
"AsyncFunctionTaskTemplate",
|
|
15
|
+
"ImageBuildEngine",
|
|
16
|
+
"ImageBuilder",
|
|
17
|
+
"ImageChecker",
|
|
18
|
+
"TaskPluginRegistry",
|
|
19
|
+
"TaskTemplate",
|
|
20
|
+
"download_code_bundle",
|
|
21
|
+
"get_proto_resources",
|
|
22
|
+
"is_initialized",
|
|
23
|
+
"pod_spec_from_resources",
|
|
24
|
+
]
|