flyte 0.0.1b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flyte might be problematic. Click here for more details.
- flyte/__init__.py +62 -0
- flyte/_api_commons.py +3 -0
- flyte/_bin/__init__.py +0 -0
- flyte/_bin/runtime.py +126 -0
- flyte/_build.py +25 -0
- flyte/_cache/__init__.py +12 -0
- flyte/_cache/cache.py +146 -0
- flyte/_cache/defaults.py +9 -0
- flyte/_cache/policy_function_body.py +42 -0
- flyte/_cli/__init__.py +0 -0
- flyte/_cli/_common.py +287 -0
- flyte/_cli/_create.py +42 -0
- flyte/_cli/_delete.py +23 -0
- flyte/_cli/_deploy.py +140 -0
- flyte/_cli/_get.py +235 -0
- flyte/_cli/_run.py +152 -0
- flyte/_cli/main.py +72 -0
- flyte/_code_bundle/__init__.py +8 -0
- flyte/_code_bundle/_ignore.py +113 -0
- flyte/_code_bundle/_packaging.py +187 -0
- flyte/_code_bundle/_utils.py +339 -0
- flyte/_code_bundle/bundle.py +178 -0
- flyte/_context.py +146 -0
- flyte/_datastructures.py +342 -0
- flyte/_deploy.py +202 -0
- flyte/_doc.py +29 -0
- flyte/_docstring.py +32 -0
- flyte/_environment.py +43 -0
- flyte/_group.py +31 -0
- flyte/_hash.py +23 -0
- flyte/_image.py +760 -0
- flyte/_initialize.py +634 -0
- flyte/_interface.py +84 -0
- flyte/_internal/__init__.py +3 -0
- flyte/_internal/controllers/__init__.py +115 -0
- flyte/_internal/controllers/_local_controller.py +118 -0
- flyte/_internal/controllers/_trace.py +40 -0
- flyte/_internal/controllers/pbhash.py +39 -0
- flyte/_internal/controllers/remote/__init__.py +40 -0
- flyte/_internal/controllers/remote/_action.py +141 -0
- flyte/_internal/controllers/remote/_client.py +43 -0
- flyte/_internal/controllers/remote/_controller.py +361 -0
- flyte/_internal/controllers/remote/_core.py +402 -0
- flyte/_internal/controllers/remote/_informer.py +361 -0
- flyte/_internal/controllers/remote/_service_protocol.py +50 -0
- flyte/_internal/imagebuild/__init__.py +11 -0
- flyte/_internal/imagebuild/docker_builder.py +416 -0
- flyte/_internal/imagebuild/image_builder.py +241 -0
- flyte/_internal/imagebuild/remote_builder.py +0 -0
- flyte/_internal/resolvers/__init__.py +0 -0
- flyte/_internal/resolvers/_task_module.py +54 -0
- flyte/_internal/resolvers/common.py +31 -0
- flyte/_internal/resolvers/default.py +28 -0
- flyte/_internal/runtime/__init__.py +0 -0
- flyte/_internal/runtime/convert.py +199 -0
- flyte/_internal/runtime/entrypoints.py +135 -0
- flyte/_internal/runtime/io.py +136 -0
- flyte/_internal/runtime/resources_serde.py +138 -0
- flyte/_internal/runtime/task_serde.py +210 -0
- flyte/_internal/runtime/taskrunner.py +190 -0
- flyte/_internal/runtime/types_serde.py +54 -0
- flyte/_logging.py +124 -0
- flyte/_protos/__init__.py +0 -0
- flyte/_protos/common/authorization_pb2.py +66 -0
- flyte/_protos/common/authorization_pb2.pyi +108 -0
- flyte/_protos/common/authorization_pb2_grpc.py +4 -0
- flyte/_protos/common/identifier_pb2.py +71 -0
- flyte/_protos/common/identifier_pb2.pyi +82 -0
- flyte/_protos/common/identifier_pb2_grpc.py +4 -0
- flyte/_protos/common/identity_pb2.py +48 -0
- flyte/_protos/common/identity_pb2.pyi +72 -0
- flyte/_protos/common/identity_pb2_grpc.py +4 -0
- flyte/_protos/common/list_pb2.py +36 -0
- flyte/_protos/common/list_pb2.pyi +69 -0
- flyte/_protos/common/list_pb2_grpc.py +4 -0
- flyte/_protos/common/policy_pb2.py +37 -0
- flyte/_protos/common/policy_pb2.pyi +27 -0
- flyte/_protos/common/policy_pb2_grpc.py +4 -0
- flyte/_protos/common/role_pb2.py +37 -0
- flyte/_protos/common/role_pb2.pyi +53 -0
- flyte/_protos/common/role_pb2_grpc.py +4 -0
- flyte/_protos/common/runtime_version_pb2.py +28 -0
- flyte/_protos/common/runtime_version_pb2.pyi +24 -0
- flyte/_protos/common/runtime_version_pb2_grpc.py +4 -0
- flyte/_protos/logs/dataplane/payload_pb2.py +96 -0
- flyte/_protos/logs/dataplane/payload_pb2.pyi +168 -0
- flyte/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
- flyte/_protos/secret/definition_pb2.py +49 -0
- flyte/_protos/secret/definition_pb2.pyi +93 -0
- flyte/_protos/secret/definition_pb2_grpc.py +4 -0
- flyte/_protos/secret/payload_pb2.py +62 -0
- flyte/_protos/secret/payload_pb2.pyi +94 -0
- flyte/_protos/secret/payload_pb2_grpc.py +4 -0
- flyte/_protos/secret/secret_pb2.py +38 -0
- flyte/_protos/secret/secret_pb2.pyi +6 -0
- flyte/_protos/secret/secret_pb2_grpc.py +198 -0
- flyte/_protos/secret/secret_pb2_grpc_grpc.py +198 -0
- flyte/_protos/validate/validate/validate_pb2.py +76 -0
- flyte/_protos/workflow/node_execution_service_pb2.py +26 -0
- flyte/_protos/workflow/node_execution_service_pb2.pyi +4 -0
- flyte/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
- flyte/_protos/workflow/queue_service_pb2.py +106 -0
- flyte/_protos/workflow/queue_service_pb2.pyi +141 -0
- flyte/_protos/workflow/queue_service_pb2_grpc.py +172 -0
- flyte/_protos/workflow/run_definition_pb2.py +128 -0
- flyte/_protos/workflow/run_definition_pb2.pyi +310 -0
- flyte/_protos/workflow/run_definition_pb2_grpc.py +4 -0
- flyte/_protos/workflow/run_logs_service_pb2.py +41 -0
- flyte/_protos/workflow/run_logs_service_pb2.pyi +28 -0
- flyte/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
- flyte/_protos/workflow/run_service_pb2.py +133 -0
- flyte/_protos/workflow/run_service_pb2.pyi +175 -0
- flyte/_protos/workflow/run_service_pb2_grpc.py +412 -0
- flyte/_protos/workflow/state_service_pb2.py +58 -0
- flyte/_protos/workflow/state_service_pb2.pyi +71 -0
- flyte/_protos/workflow/state_service_pb2_grpc.py +138 -0
- flyte/_protos/workflow/task_definition_pb2.py +72 -0
- flyte/_protos/workflow/task_definition_pb2.pyi +65 -0
- flyte/_protos/workflow/task_definition_pb2_grpc.py +4 -0
- flyte/_protos/workflow/task_service_pb2.py +44 -0
- flyte/_protos/workflow/task_service_pb2.pyi +31 -0
- flyte/_protos/workflow/task_service_pb2_grpc.py +104 -0
- flyte/_resources.py +226 -0
- flyte/_retry.py +32 -0
- flyte/_reusable_environment.py +25 -0
- flyte/_run.py +411 -0
- flyte/_secret.py +61 -0
- flyte/_task.py +367 -0
- flyte/_task_environment.py +200 -0
- flyte/_timeout.py +47 -0
- flyte/_tools.py +27 -0
- flyte/_trace.py +128 -0
- flyte/_utils/__init__.py +20 -0
- flyte/_utils/asyn.py +119 -0
- flyte/_utils/coro_management.py +25 -0
- flyte/_utils/file_handling.py +72 -0
- flyte/_utils/helpers.py +108 -0
- flyte/_utils/lazy_module.py +54 -0
- flyte/_utils/uv_script_parser.py +49 -0
- flyte/_version.py +21 -0
- flyte/connectors/__init__.py +0 -0
- flyte/errors.py +143 -0
- flyte/extras/__init__.py +5 -0
- flyte/extras/_container.py +273 -0
- flyte/io/__init__.py +11 -0
- flyte/io/_dataframe.py +0 -0
- flyte/io/_dir.py +448 -0
- flyte/io/_file.py +468 -0
- flyte/io/pickle/__init__.py +0 -0
- flyte/io/pickle/transformer.py +117 -0
- flyte/io/structured_dataset/__init__.py +129 -0
- flyte/io/structured_dataset/basic_dfs.py +219 -0
- flyte/io/structured_dataset/structured_dataset.py +1061 -0
- flyte/py.typed +0 -0
- flyte/remote/__init__.py +25 -0
- flyte/remote/_client/__init__.py +0 -0
- flyte/remote/_client/_protocols.py +131 -0
- flyte/remote/_client/auth/__init__.py +12 -0
- flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
- flyte/remote/_client/auth/_authenticators/base.py +397 -0
- flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
- flyte/remote/_client/auth/_authenticators/device_code.py +118 -0
- flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
- flyte/remote/_client/auth/_authenticators/factory.py +200 -0
- flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
- flyte/remote/_client/auth/_channel.py +184 -0
- flyte/remote/_client/auth/_client_config.py +83 -0
- flyte/remote/_client/auth/_default_html.py +32 -0
- flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
- flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
- flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
- flyte/remote/_client/auth/_keyring.py +143 -0
- flyte/remote/_client/auth/_token_client.py +260 -0
- flyte/remote/_client/auth/errors.py +16 -0
- flyte/remote/_client/controlplane.py +95 -0
- flyte/remote/_console.py +18 -0
- flyte/remote/_data.py +155 -0
- flyte/remote/_logs.py +116 -0
- flyte/remote/_project.py +86 -0
- flyte/remote/_run.py +873 -0
- flyte/remote/_secret.py +132 -0
- flyte/remote/_task.py +227 -0
- flyte/report/__init__.py +3 -0
- flyte/report/_report.py +178 -0
- flyte/report/_template.html +124 -0
- flyte/storage/__init__.py +24 -0
- flyte/storage/_remote_fs.py +34 -0
- flyte/storage/_storage.py +251 -0
- flyte/storage/_utils.py +5 -0
- flyte/types/__init__.py +13 -0
- flyte/types/_interface.py +25 -0
- flyte/types/_renderer.py +162 -0
- flyte/types/_string_literals.py +120 -0
- flyte/types/_type_engine.py +2210 -0
- flyte/types/_utils.py +80 -0
- flyte-0.0.1b0.dist-info/METADATA +179 -0
- flyte-0.0.1b0.dist-info/RECORD +390 -0
- flyte-0.0.1b0.dist-info/WHEEL +5 -0
- flyte-0.0.1b0.dist-info/entry_points.txt +3 -0
- flyte-0.0.1b0.dist-info/top_level.txt +1 -0
- union/__init__.py +54 -0
- union/_api_commons.py +3 -0
- union/_bin/__init__.py +0 -0
- union/_bin/runtime.py +113 -0
- union/_build.py +25 -0
- union/_cache/__init__.py +12 -0
- union/_cache/cache.py +141 -0
- union/_cache/defaults.py +9 -0
- union/_cache/policy_function_body.py +42 -0
- union/_cli/__init__.py +0 -0
- union/_cli/_common.py +263 -0
- union/_cli/_create.py +40 -0
- union/_cli/_delete.py +23 -0
- union/_cli/_deploy.py +120 -0
- union/_cli/_get.py +162 -0
- union/_cli/_params.py +579 -0
- union/_cli/_run.py +150 -0
- union/_cli/main.py +72 -0
- union/_code_bundle/__init__.py +8 -0
- union/_code_bundle/_ignore.py +113 -0
- union/_code_bundle/_packaging.py +187 -0
- union/_code_bundle/_utils.py +342 -0
- union/_code_bundle/bundle.py +176 -0
- union/_context.py +146 -0
- union/_datastructures.py +295 -0
- union/_deploy.py +185 -0
- union/_doc.py +29 -0
- union/_docstring.py +26 -0
- union/_environment.py +43 -0
- union/_group.py +31 -0
- union/_hash.py +23 -0
- union/_image.py +760 -0
- union/_initialize.py +585 -0
- union/_interface.py +84 -0
- union/_internal/__init__.py +3 -0
- union/_internal/controllers/__init__.py +77 -0
- union/_internal/controllers/_local_controller.py +77 -0
- union/_internal/controllers/pbhash.py +39 -0
- union/_internal/controllers/remote/__init__.py +40 -0
- union/_internal/controllers/remote/_action.py +131 -0
- union/_internal/controllers/remote/_client.py +43 -0
- union/_internal/controllers/remote/_controller.py +169 -0
- union/_internal/controllers/remote/_core.py +341 -0
- union/_internal/controllers/remote/_informer.py +260 -0
- union/_internal/controllers/remote/_service_protocol.py +44 -0
- union/_internal/imagebuild/__init__.py +11 -0
- union/_internal/imagebuild/docker_builder.py +416 -0
- union/_internal/imagebuild/image_builder.py +243 -0
- union/_internal/imagebuild/remote_builder.py +0 -0
- union/_internal/resolvers/__init__.py +0 -0
- union/_internal/resolvers/_task_module.py +31 -0
- union/_internal/resolvers/common.py +24 -0
- union/_internal/resolvers/default.py +27 -0
- union/_internal/runtime/__init__.py +0 -0
- union/_internal/runtime/convert.py +163 -0
- union/_internal/runtime/entrypoints.py +121 -0
- union/_internal/runtime/io.py +136 -0
- union/_internal/runtime/resources_serde.py +134 -0
- union/_internal/runtime/task_serde.py +202 -0
- union/_internal/runtime/taskrunner.py +179 -0
- union/_internal/runtime/types_serde.py +53 -0
- union/_logging.py +124 -0
- union/_protos/__init__.py +0 -0
- union/_protos/common/authorization_pb2.py +66 -0
- union/_protos/common/authorization_pb2.pyi +106 -0
- union/_protos/common/authorization_pb2_grpc.py +4 -0
- union/_protos/common/identifier_pb2.py +71 -0
- union/_protos/common/identifier_pb2.pyi +82 -0
- union/_protos/common/identifier_pb2_grpc.py +4 -0
- union/_protos/common/identity_pb2.py +48 -0
- union/_protos/common/identity_pb2.pyi +72 -0
- union/_protos/common/identity_pb2_grpc.py +4 -0
- union/_protos/common/list_pb2.py +36 -0
- union/_protos/common/list_pb2.pyi +69 -0
- union/_protos/common/list_pb2_grpc.py +4 -0
- union/_protos/common/policy_pb2.py +37 -0
- union/_protos/common/policy_pb2.pyi +27 -0
- union/_protos/common/policy_pb2_grpc.py +4 -0
- union/_protos/common/role_pb2.py +37 -0
- union/_protos/common/role_pb2.pyi +51 -0
- union/_protos/common/role_pb2_grpc.py +4 -0
- union/_protos/common/runtime_version_pb2.py +28 -0
- union/_protos/common/runtime_version_pb2.pyi +24 -0
- union/_protos/common/runtime_version_pb2_grpc.py +4 -0
- union/_protos/logs/dataplane/payload_pb2.py +96 -0
- union/_protos/logs/dataplane/payload_pb2.pyi +168 -0
- union/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
- union/_protos/secret/definition_pb2.py +49 -0
- union/_protos/secret/definition_pb2.pyi +93 -0
- union/_protos/secret/definition_pb2_grpc.py +4 -0
- union/_protos/secret/payload_pb2.py +62 -0
- union/_protos/secret/payload_pb2.pyi +94 -0
- union/_protos/secret/payload_pb2_grpc.py +4 -0
- union/_protos/secret/secret_pb2.py +38 -0
- union/_protos/secret/secret_pb2.pyi +6 -0
- union/_protos/secret/secret_pb2_grpc.py +198 -0
- union/_protos/validate/validate/validate_pb2.py +76 -0
- union/_protos/workflow/node_execution_service_pb2.py +26 -0
- union/_protos/workflow/node_execution_service_pb2.pyi +4 -0
- union/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
- union/_protos/workflow/queue_service_pb2.py +75 -0
- union/_protos/workflow/queue_service_pb2.pyi +103 -0
- union/_protos/workflow/queue_service_pb2_grpc.py +172 -0
- union/_protos/workflow/run_definition_pb2.py +100 -0
- union/_protos/workflow/run_definition_pb2.pyi +256 -0
- union/_protos/workflow/run_definition_pb2_grpc.py +4 -0
- union/_protos/workflow/run_logs_service_pb2.py +41 -0
- union/_protos/workflow/run_logs_service_pb2.pyi +28 -0
- union/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
- union/_protos/workflow/run_service_pb2.py +133 -0
- union/_protos/workflow/run_service_pb2.pyi +173 -0
- union/_protos/workflow/run_service_pb2_grpc.py +412 -0
- union/_protos/workflow/state_service_pb2.py +58 -0
- union/_protos/workflow/state_service_pb2.pyi +69 -0
- union/_protos/workflow/state_service_pb2_grpc.py +138 -0
- union/_protos/workflow/task_definition_pb2.py +72 -0
- union/_protos/workflow/task_definition_pb2.pyi +65 -0
- union/_protos/workflow/task_definition_pb2_grpc.py +4 -0
- union/_protos/workflow/task_service_pb2.py +44 -0
- union/_protos/workflow/task_service_pb2.pyi +31 -0
- union/_protos/workflow/task_service_pb2_grpc.py +104 -0
- union/_resources.py +226 -0
- union/_retry.py +32 -0
- union/_reusable_environment.py +25 -0
- union/_run.py +374 -0
- union/_secret.py +61 -0
- union/_task.py +354 -0
- union/_task_environment.py +186 -0
- union/_timeout.py +47 -0
- union/_tools.py +27 -0
- union/_utils/__init__.py +11 -0
- union/_utils/asyn.py +119 -0
- union/_utils/file_handling.py +71 -0
- union/_utils/helpers.py +46 -0
- union/_utils/lazy_module.py +54 -0
- union/_utils/uv_script_parser.py +49 -0
- union/_version.py +21 -0
- union/connectors/__init__.py +0 -0
- union/errors.py +128 -0
- union/extras/__init__.py +5 -0
- union/extras/_container.py +263 -0
- union/io/__init__.py +11 -0
- union/io/_dataframe.py +0 -0
- union/io/_dir.py +425 -0
- union/io/_file.py +418 -0
- union/io/pickle/__init__.py +0 -0
- union/io/pickle/transformer.py +117 -0
- union/io/structured_dataset/__init__.py +122 -0
- union/io/structured_dataset/basic_dfs.py +219 -0
- union/io/structured_dataset/structured_dataset.py +1057 -0
- union/py.typed +0 -0
- union/remote/__init__.py +23 -0
- union/remote/_client/__init__.py +0 -0
- union/remote/_client/_protocols.py +129 -0
- union/remote/_client/auth/__init__.py +12 -0
- union/remote/_client/auth/_authenticators/__init__.py +0 -0
- union/remote/_client/auth/_authenticators/base.py +391 -0
- union/remote/_client/auth/_authenticators/client_credentials.py +73 -0
- union/remote/_client/auth/_authenticators/device_code.py +120 -0
- union/remote/_client/auth/_authenticators/external_command.py +77 -0
- union/remote/_client/auth/_authenticators/factory.py +200 -0
- union/remote/_client/auth/_authenticators/pkce.py +515 -0
- union/remote/_client/auth/_channel.py +184 -0
- union/remote/_client/auth/_client_config.py +83 -0
- union/remote/_client/auth/_default_html.py +32 -0
- union/remote/_client/auth/_grpc_utils/__init__.py +0 -0
- union/remote/_client/auth/_grpc_utils/auth_interceptor.py +204 -0
- union/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +144 -0
- union/remote/_client/auth/_keyring.py +154 -0
- union/remote/_client/auth/_token_client.py +258 -0
- union/remote/_client/auth/errors.py +16 -0
- union/remote/_client/controlplane.py +86 -0
- union/remote/_data.py +149 -0
- union/remote/_logs.py +74 -0
- union/remote/_project.py +86 -0
- union/remote/_run.py +820 -0
- union/remote/_secret.py +132 -0
- union/remote/_task.py +193 -0
- union/report/__init__.py +3 -0
- union/report/_report.py +178 -0
- union/report/_template.html +124 -0
- union/storage/__init__.py +24 -0
- union/storage/_remote_fs.py +34 -0
- union/storage/_storage.py +247 -0
- union/storage/_utils.py +5 -0
- union/types/__init__.py +11 -0
- union/types/_renderer.py +162 -0
- union/types/_string_literals.py +120 -0
- union/types/_type_engine.py +2131 -0
- union/types/_utils.py +80 -0
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from asyncio import Queue
|
|
5
|
+
from typing import AsyncIterator, Callable, Dict, Optional, Tuple, cast
|
|
6
|
+
|
|
7
|
+
import grpc.aio
|
|
8
|
+
|
|
9
|
+
from flyte._logging import log, logger
|
|
10
|
+
from flyte._protos.workflow import run_definition_pb2, state_service_pb2
|
|
11
|
+
|
|
12
|
+
from ._action import Action
|
|
13
|
+
from ._service_protocol import StateService
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ActionCache:
|
|
17
|
+
"""
|
|
18
|
+
Cache for actions, used to store the state of all sub-actions, launched by this parent action.
|
|
19
|
+
This is coroutine-safe.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, parent_action_name: str):
|
|
23
|
+
# Cache for actions (sub-actions)
|
|
24
|
+
self._cache: Dict[str, Action] = {}
|
|
25
|
+
# Completion events for actions
|
|
26
|
+
self._completion_events: Dict[str, asyncio.Event] = {}
|
|
27
|
+
# Lock for coroutine safety
|
|
28
|
+
self._lock = asyncio.Lock()
|
|
29
|
+
# Parent action name
|
|
30
|
+
self._parent_action_name = parent_action_name
|
|
31
|
+
|
|
32
|
+
async def has(self, name: str) -> bool:
|
|
33
|
+
"""Check if a node is in the cache"""
|
|
34
|
+
async with self._lock:
|
|
35
|
+
return name in self._cache
|
|
36
|
+
|
|
37
|
+
async def observe_state(self, state: state_service_pb2.ActionUpdate) -> Action:
|
|
38
|
+
"""
|
|
39
|
+
Add an action to the cache if it doesn't exist. This is invoked by the watch.
|
|
40
|
+
"""
|
|
41
|
+
logger.info(f"Observing phase {run_definition_pb2.Phase.Name(state.phase)} for {state.action_id.name}")
|
|
42
|
+
if state.output_uri:
|
|
43
|
+
logger.info(f"Output URI: {state.output_uri}")
|
|
44
|
+
else:
|
|
45
|
+
logger.info(f"{state.action_id.name} has no output URI")
|
|
46
|
+
if state.phase == run_definition_pb2.Phase.PHASE_FAILED:
|
|
47
|
+
logger.error(
|
|
48
|
+
f"Action {state.action_id.name} failed with error (msg):"
|
|
49
|
+
f" [{state.error if state.HasField('error') else None}]"
|
|
50
|
+
)
|
|
51
|
+
async with self._lock:
|
|
52
|
+
if state.action_id.name in self._cache:
|
|
53
|
+
self._cache[state.action_id.name].merge_state(state)
|
|
54
|
+
else:
|
|
55
|
+
self._cache[state.action_id.name] = Action.from_state(self._parent_action_name, state)
|
|
56
|
+
return self._cache[state.action_id.name]
|
|
57
|
+
|
|
58
|
+
async def submit(self, action: Action) -> Action:
|
|
59
|
+
"""
|
|
60
|
+
Submit a new Action to the cache. This is invoked by the parent_action.
|
|
61
|
+
"""
|
|
62
|
+
async with self._lock:
|
|
63
|
+
if action.name in self._cache:
|
|
64
|
+
self._cache[action.name].merge_in_action_from_submit(action)
|
|
65
|
+
else:
|
|
66
|
+
self._cache[action.name] = action
|
|
67
|
+
if action.name not in self._completion_events:
|
|
68
|
+
self._completion_events[action.name] = asyncio.Event()
|
|
69
|
+
return self._cache[action.name]
|
|
70
|
+
|
|
71
|
+
async def get(self, name: str) -> Action | None:
|
|
72
|
+
"""Get an action by its name from the cache"""
|
|
73
|
+
async with self._lock:
|
|
74
|
+
return self._cache.get(name, None)
|
|
75
|
+
|
|
76
|
+
async def remove(self, name: str) -> Action | None:
|
|
77
|
+
"""Remove an action from the cache"""
|
|
78
|
+
async with self._lock:
|
|
79
|
+
return self._cache.pop(name, None)
|
|
80
|
+
|
|
81
|
+
async def wait_for_completion(self, name: str) -> bool:
|
|
82
|
+
"""Wait for an action to complete"""
|
|
83
|
+
async with self._lock:
|
|
84
|
+
if name not in self._completion_events:
|
|
85
|
+
return False
|
|
86
|
+
event = self._completion_events[name]
|
|
87
|
+
return await event.wait()
|
|
88
|
+
|
|
89
|
+
async def fire_all_completion_events(self):
|
|
90
|
+
"""Fire all completion events"""
|
|
91
|
+
async with self._lock:
|
|
92
|
+
for name, event in self._completion_events.items():
|
|
93
|
+
event.set()
|
|
94
|
+
self._completion_events.clear()
|
|
95
|
+
|
|
96
|
+
async def fire_completion_event(self, name: str):
|
|
97
|
+
"""Fire a completion event for an action"""
|
|
98
|
+
async with self._lock:
|
|
99
|
+
if name in self._completion_events:
|
|
100
|
+
self._completion_events[name].set()
|
|
101
|
+
|
|
102
|
+
async def count_started_pending_terminal_actions(self) -> Tuple[int, int, int]:
|
|
103
|
+
"""
|
|
104
|
+
Get all started, pending and terminal actions.
|
|
105
|
+
Started: implies they were submitted to queue service
|
|
106
|
+
Pending: implies they are still not submitted to the queue service
|
|
107
|
+
Terminal: implies completed (success, failure, aborted, timedout) actions
|
|
108
|
+
"""
|
|
109
|
+
started = 0
|
|
110
|
+
pending = 0
|
|
111
|
+
terminal = 0
|
|
112
|
+
async with self._lock:
|
|
113
|
+
for name, res in self._cache.items():
|
|
114
|
+
if res.is_started():
|
|
115
|
+
started += 1
|
|
116
|
+
elif res.is_terminal():
|
|
117
|
+
terminal += 1
|
|
118
|
+
else:
|
|
119
|
+
pending += 1
|
|
120
|
+
return started, pending, terminal
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class Informer:
|
|
124
|
+
"""Remote StateStore watcher and informer for sub-actions."""
|
|
125
|
+
|
|
126
|
+
def __init__(
|
|
127
|
+
self,
|
|
128
|
+
run_id: run_definition_pb2.RunIdentifier,
|
|
129
|
+
parent_action_name: str,
|
|
130
|
+
shared_queue: Queue,
|
|
131
|
+
client: Optional[StateService] = None,
|
|
132
|
+
watch_backoff_interval_sec: float = 1.0,
|
|
133
|
+
watch_conn_timeout_sec: float = 5.0,
|
|
134
|
+
):
|
|
135
|
+
self.name = self.mkname(run_name=run_id.name, parent_action_name=parent_action_name)
|
|
136
|
+
self.parent_action_name = parent_action_name
|
|
137
|
+
self._run_id = run_id
|
|
138
|
+
self._client = client
|
|
139
|
+
self._action_cache = ActionCache(parent_action_name)
|
|
140
|
+
self._shared_queue = shared_queue
|
|
141
|
+
self._running = False
|
|
142
|
+
self._watch_task: asyncio.Task | None = None
|
|
143
|
+
self._ready = asyncio.Event()
|
|
144
|
+
self._watch_backoff_interval_sec = watch_backoff_interval_sec
|
|
145
|
+
self._watch_conn_timeout_sec = watch_conn_timeout_sec
|
|
146
|
+
|
|
147
|
+
@classmethod
|
|
148
|
+
def mkname(cls, *, run_name: str, parent_action_name: str) -> str:
|
|
149
|
+
"""Get the name of the informer"""
|
|
150
|
+
return f"{run_name}.{parent_action_name}"
|
|
151
|
+
|
|
152
|
+
@property
|
|
153
|
+
def watch_task(self) -> asyncio.Task | None:
|
|
154
|
+
"""Get the watch task"""
|
|
155
|
+
return self._watch_task
|
|
156
|
+
|
|
157
|
+
def is_running(self) -> bool:
|
|
158
|
+
"""Check if informer is running"""
|
|
159
|
+
return self._running
|
|
160
|
+
|
|
161
|
+
async def _set_ready(self):
|
|
162
|
+
"""Set the informer as ready"""
|
|
163
|
+
self._ready.set()
|
|
164
|
+
|
|
165
|
+
async def wait_for_cache_sync(self, timeout: Optional[float] = None) -> bool:
|
|
166
|
+
"""
|
|
167
|
+
Wait for the informer to be ready. In the case of a timeout, it will return False.
|
|
168
|
+
:param timeout: float time to wait for
|
|
169
|
+
:return: bool
|
|
170
|
+
"""
|
|
171
|
+
try:
|
|
172
|
+
await asyncio.wait_for(self._ready.wait(), timeout=timeout)
|
|
173
|
+
return True
|
|
174
|
+
except asyncio.TimeoutError:
|
|
175
|
+
logger.error(f"Informer cache sync timed out, for {self.name}")
|
|
176
|
+
return False
|
|
177
|
+
|
|
178
|
+
async def wait_for_action_completion(self, name: str) -> bool:
|
|
179
|
+
"""Wait for an action to complete"""
|
|
180
|
+
return await self._action_cache.wait_for_completion(name)
|
|
181
|
+
|
|
182
|
+
async def fire_completion_event(self, name: str):
|
|
183
|
+
"""Fire a completion event for an action"""
|
|
184
|
+
await self._action_cache.fire_completion_event(name)
|
|
185
|
+
|
|
186
|
+
@log
|
|
187
|
+
async def submit(self, action: Action):
|
|
188
|
+
"""Add a new resource to watch"""
|
|
189
|
+
node = await self._action_cache.submit(action)
|
|
190
|
+
await self._shared_queue.put(node)
|
|
191
|
+
|
|
192
|
+
@log
|
|
193
|
+
async def remove(self, name: str):
|
|
194
|
+
"""Remove a resource from watching"""
|
|
195
|
+
await self._action_cache.remove(name)
|
|
196
|
+
|
|
197
|
+
async def get(self, name: str) -> Action | None:
|
|
198
|
+
"""Get a resource by name"""
|
|
199
|
+
return await self._action_cache.get(name)
|
|
200
|
+
|
|
201
|
+
async def has(self, name: str) -> bool:
|
|
202
|
+
"""Check if a resource exists"""
|
|
203
|
+
return await self._action_cache.has(name)
|
|
204
|
+
|
|
205
|
+
async def watch(self):
|
|
206
|
+
"""
|
|
207
|
+
Watch for updates on all resources - to be implemented by subclasses for watch mode
|
|
208
|
+
"""
|
|
209
|
+
# sentinel = False
|
|
210
|
+
retries = 0
|
|
211
|
+
max_retries = 5
|
|
212
|
+
last_exc = None
|
|
213
|
+
while self._running:
|
|
214
|
+
if retries >= max_retries:
|
|
215
|
+
logger.error(f"Informer watch failure retries crossed threshold {retries}/{max_retries}, exiting!")
|
|
216
|
+
raise last_exc
|
|
217
|
+
try:
|
|
218
|
+
watcher = self._client.Watch(
|
|
219
|
+
state_service_pb2.WatchRequest(
|
|
220
|
+
parent_action_id=run_definition_pb2.ActionIdentifier(
|
|
221
|
+
name=self.parent_action_name,
|
|
222
|
+
run=self._run_id,
|
|
223
|
+
),
|
|
224
|
+
),
|
|
225
|
+
wait_for_ready=True,
|
|
226
|
+
)
|
|
227
|
+
resp: state_service_pb2.WatchResponse
|
|
228
|
+
async for resp in watcher:
|
|
229
|
+
retries = 0
|
|
230
|
+
if resp.control_message is not None and resp.control_message.sentinel:
|
|
231
|
+
logger.info(f"Received Sentinel, for run {self.name}")
|
|
232
|
+
await self._set_ready()
|
|
233
|
+
continue
|
|
234
|
+
node = await self._action_cache.observe_state(resp.action_update)
|
|
235
|
+
await self._shared_queue.put(node)
|
|
236
|
+
# hack to work in the absence of sentinel
|
|
237
|
+
except asyncio.CancelledError:
|
|
238
|
+
logger.warning(f"Watch cancelled: {self.name}")
|
|
239
|
+
return
|
|
240
|
+
except asyncio.TimeoutError as e:
|
|
241
|
+
logger.error(f"Watch timeout: {self.name}", exc_info=e)
|
|
242
|
+
last_exc = e
|
|
243
|
+
retries += 1
|
|
244
|
+
except grpc.aio.AioRpcError as e:
|
|
245
|
+
logger.exception(f"RPC error: {self.name}", exc_info=e)
|
|
246
|
+
last_exc = e
|
|
247
|
+
retries += 1
|
|
248
|
+
except Exception as e:
|
|
249
|
+
logger.exception(f"Watch error: {self.name}", exc_info=e)
|
|
250
|
+
last_exc = e
|
|
251
|
+
retries += 1
|
|
252
|
+
await asyncio.sleep(self._watch_backoff_interval_sec)
|
|
253
|
+
|
|
254
|
+
@log
|
|
255
|
+
async def start(self, timeout: Optional[float] = None) -> asyncio.Task:
|
|
256
|
+
"""Start the informer"""
|
|
257
|
+
if self._running:
|
|
258
|
+
logger.warning("Informer already running")
|
|
259
|
+
return cast(asyncio.Task, self._watch_task)
|
|
260
|
+
self._running = True
|
|
261
|
+
self._watch_task = asyncio.create_task(self.watch())
|
|
262
|
+
await self.wait_for_cache_sync(timeout=timeout)
|
|
263
|
+
return self._watch_task
|
|
264
|
+
|
|
265
|
+
async def count_started_pending_terminal_actions(self) -> Tuple[int, int, int]:
|
|
266
|
+
"""Get all launched and waiting resources"""
|
|
267
|
+
return await self._action_cache.count_started_pending_terminal_actions()
|
|
268
|
+
|
|
269
|
+
@log
|
|
270
|
+
async def stop(self):
|
|
271
|
+
"""Stop the informer"""
|
|
272
|
+
self._running = False
|
|
273
|
+
if self._watch_task:
|
|
274
|
+
self._watch_task.cancel()
|
|
275
|
+
self._watch_task = None
|
|
276
|
+
logger.info("Stopped informer")
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
class InformerCache:
|
|
280
|
+
"""Cache for informers, used to store the state of all subactions for multiple parent_actions.
|
|
281
|
+
This is coroutine-safe.
|
|
282
|
+
"""
|
|
283
|
+
|
|
284
|
+
def __init__(self):
|
|
285
|
+
self._cache: Dict[str, Informer] = {}
|
|
286
|
+
self._lock = asyncio.Lock()
|
|
287
|
+
|
|
288
|
+
@log
|
|
289
|
+
async def get_or_create(
|
|
290
|
+
self,
|
|
291
|
+
run_id: run_definition_pb2.RunIdentifier,
|
|
292
|
+
parent_action_name: str,
|
|
293
|
+
shared_queue: Queue,
|
|
294
|
+
state_service: StateService,
|
|
295
|
+
fn: Callable[[asyncio.Task], None],
|
|
296
|
+
timeout: Optional[float] = None,
|
|
297
|
+
) -> Informer:
|
|
298
|
+
"""
|
|
299
|
+
Start and add a new informer to the cache
|
|
300
|
+
:param run_id: Run ID
|
|
301
|
+
:param parent_action_name: Parent action name
|
|
302
|
+
:param shared_queue: Shared queue
|
|
303
|
+
:param state_service: State service
|
|
304
|
+
:param fn: Callback function to be called when the informer is done
|
|
305
|
+
:param timeout: Timeout for the informer to be ready
|
|
306
|
+
:return: Tuple of informer and a boolean indicating if it was created. True if created, false if already exists.
|
|
307
|
+
"""
|
|
308
|
+
name = Informer.mkname(run_name=run_id.name, parent_action_name=parent_action_name)
|
|
309
|
+
async with self._lock:
|
|
310
|
+
if name in self._cache:
|
|
311
|
+
return self._cache[name]
|
|
312
|
+
informer = Informer(
|
|
313
|
+
run_id=run_id,
|
|
314
|
+
parent_action_name=parent_action_name,
|
|
315
|
+
shared_queue=shared_queue,
|
|
316
|
+
client=state_service,
|
|
317
|
+
)
|
|
318
|
+
self._cache[informer.name] = informer
|
|
319
|
+
# TODO This is a potential perf problem for large number of informers.
|
|
320
|
+
# We can start in only if it is not started. Reason to do this overly optimistic is to avoid,
|
|
321
|
+
# remove from removing the cache.
|
|
322
|
+
task = await informer.start(timeout=timeout)
|
|
323
|
+
if task is None:
|
|
324
|
+
logger.error(f"Informer {name} failed to start")
|
|
325
|
+
raise RuntimeError(f"Informer {name} failed to start")
|
|
326
|
+
task.add_done_callback(fn)
|
|
327
|
+
return informer
|
|
328
|
+
|
|
329
|
+
@log
|
|
330
|
+
async def get(self, *, run_name: str, parent_action_name: str) -> Informer | None:
|
|
331
|
+
"""Get an informer by name"""
|
|
332
|
+
async with self._lock:
|
|
333
|
+
return self._cache.get(Informer.mkname(run_name=run_name, parent_action_name=parent_action_name), None)
|
|
334
|
+
|
|
335
|
+
@log
|
|
336
|
+
async def remove(self, *, run_name: str, parent_action_name: str) -> Informer | None:
|
|
337
|
+
"""Remove an informer from the cache"""
|
|
338
|
+
async with self._lock:
|
|
339
|
+
return self._cache.pop(Informer.mkname(run_name=run_name, parent_action_name=parent_action_name), None)
|
|
340
|
+
|
|
341
|
+
async def has(self, *, run_name: str, parent_action_name: str) -> bool:
|
|
342
|
+
"""Check if an informer exists in the cache"""
|
|
343
|
+
async with self._lock:
|
|
344
|
+
return Informer.mkname(run_name=run_name, parent_action_name=parent_action_name) in self._cache
|
|
345
|
+
|
|
346
|
+
async def count_started_pending_terminal_actions(self) -> AsyncIterator[Tuple[int, int, int]]:
|
|
347
|
+
"""Log resource stats"""
|
|
348
|
+
async with self._lock:
|
|
349
|
+
for informer in self._cache.values():
|
|
350
|
+
yield await informer.count_started_pending_terminal_actions()
|
|
351
|
+
|
|
352
|
+
async def remove_and_stop_all(self):
|
|
353
|
+
"""Stop all informers and remove them from the cache"""
|
|
354
|
+
async with self._lock:
|
|
355
|
+
while self._cache:
|
|
356
|
+
name, informer = self._cache.popitem()
|
|
357
|
+
try:
|
|
358
|
+
await informer.stop()
|
|
359
|
+
except asyncio.CancelledError:
|
|
360
|
+
pass
|
|
361
|
+
self._cache.clear()
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import AsyncIterator, Protocol
|
|
4
|
+
|
|
5
|
+
from flyte._protos.workflow import queue_service_pb2, state_service_pb2
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class StateService(Protocol):
|
|
9
|
+
"""
|
|
10
|
+
Interface for the state store client, which stores the history of all subruns.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
async def Watch(
|
|
14
|
+
self, req: state_service_pb2.WatchRequest, **kwargs
|
|
15
|
+
) -> AsyncIterator[state_service_pb2.WatchResponse]:
|
|
16
|
+
"""Watch for subrun updates"""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class QueueService(Protocol):
|
|
20
|
+
"""
|
|
21
|
+
Interface for the remote queue service, which is responsible for managing the queue of tasks.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
async def EnqueueAction(
|
|
25
|
+
self,
|
|
26
|
+
req: queue_service_pb2.EnqueueActionRequest,
|
|
27
|
+
**kwargs,
|
|
28
|
+
) -> queue_service_pb2.EnqueueActionResponse:
|
|
29
|
+
"""Enqueue a task"""
|
|
30
|
+
|
|
31
|
+
async def AbortQueuedAction(
|
|
32
|
+
self,
|
|
33
|
+
req: queue_service_pb2.AbortQueuedActionRequest,
|
|
34
|
+
**kwargs,
|
|
35
|
+
) -> queue_service_pb2.AbortQueuedActionResponse:
|
|
36
|
+
"""Dequeue a task"""
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ClientSet(Protocol):
|
|
40
|
+
"""
|
|
41
|
+
Interface for the remote client set, which is responsible for managing the queue of tasks.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def state_service(self: ClientSet) -> StateService:
|
|
46
|
+
"""State service"""
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def queue_service(self: ClientSet) -> QueueService:
|
|
50
|
+
"""Queue service"""
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
from flyte._image import Image
|
|
5
|
+
from flyte._internal.imagebuild.docker_builder import DockerImageBuilder
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
async def build(images: List[Image]) -> List[str]:
|
|
9
|
+
builder = DockerImageBuilder()
|
|
10
|
+
ts = [asyncio.create_task(builder.build_image(image)) for image in images]
|
|
11
|
+
return list(await asyncio.gather(*ts))
|