flyte 0.0.1b3__py3-none-any.whl → 0.2.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flyte might be problematic. Click here for more details.
- flyte/__init__.py +20 -4
- flyte/_bin/runtime.py +33 -7
- flyte/_build.py +3 -2
- flyte/_cache/cache.py +1 -2
- flyte/_code_bundle/_packaging.py +1 -1
- flyte/_code_bundle/_utils.py +0 -16
- flyte/_code_bundle/bundle.py +43 -12
- flyte/_context.py +8 -2
- flyte/_deploy.py +56 -15
- flyte/_environment.py +45 -4
- flyte/_excepthook.py +37 -0
- flyte/_group.py +2 -1
- flyte/_image.py +8 -4
- flyte/_initialize.py +112 -254
- flyte/_interface.py +3 -3
- flyte/_internal/controllers/__init__.py +19 -6
- flyte/_internal/controllers/_local_controller.py +83 -8
- flyte/_internal/controllers/_trace.py +2 -1
- flyte/_internal/controllers/remote/__init__.py +27 -7
- flyte/_internal/controllers/remote/_action.py +7 -2
- flyte/_internal/controllers/remote/_client.py +5 -1
- flyte/_internal/controllers/remote/_controller.py +159 -26
- flyte/_internal/controllers/remote/_core.py +13 -5
- flyte/_internal/controllers/remote/_informer.py +4 -4
- flyte/_internal/controllers/remote/_service_protocol.py +6 -6
- flyte/_internal/imagebuild/docker_builder.py +12 -1
- flyte/_internal/imagebuild/image_builder.py +16 -11
- flyte/_internal/runtime/convert.py +164 -21
- flyte/_internal/runtime/entrypoints.py +1 -1
- flyte/_internal/runtime/io.py +3 -3
- flyte/_internal/runtime/task_serde.py +140 -20
- flyte/_internal/runtime/taskrunner.py +4 -3
- flyte/_internal/runtime/types_serde.py +1 -1
- flyte/_logging.py +12 -1
- flyte/_map.py +215 -0
- flyte/_pod.py +19 -0
- flyte/_protos/common/list_pb2.py +3 -3
- flyte/_protos/common/list_pb2.pyi +2 -0
- flyte/_protos/logs/dataplane/payload_pb2.py +28 -24
- flyte/_protos/logs/dataplane/payload_pb2.pyi +11 -2
- flyte/_protos/workflow/common_pb2.py +27 -0
- flyte/_protos/workflow/common_pb2.pyi +14 -0
- flyte/_protos/workflow/environment_pb2.py +29 -0
- flyte/_protos/workflow/environment_pb2.pyi +12 -0
- flyte/_protos/workflow/queue_service_pb2.py +40 -41
- flyte/_protos/workflow/queue_service_pb2.pyi +35 -30
- flyte/_protos/workflow/queue_service_pb2_grpc.py +15 -15
- flyte/_protos/workflow/run_definition_pb2.py +61 -61
- flyte/_protos/workflow/run_definition_pb2.pyi +8 -4
- flyte/_protos/workflow/run_service_pb2.py +20 -24
- flyte/_protos/workflow/run_service_pb2.pyi +2 -6
- flyte/_protos/workflow/state_service_pb2.py +36 -28
- flyte/_protos/workflow/state_service_pb2.pyi +19 -15
- flyte/_protos/workflow/state_service_pb2_grpc.py +28 -28
- flyte/_protos/workflow/task_definition_pb2.py +29 -22
- flyte/_protos/workflow/task_definition_pb2.pyi +21 -5
- flyte/_protos/workflow/task_service_pb2.py +27 -11
- flyte/_protos/workflow/task_service_pb2.pyi +29 -1
- flyte/_protos/workflow/task_service_pb2_grpc.py +34 -0
- flyte/_run.py +166 -95
- flyte/_task.py +110 -28
- flyte/_task_environment.py +55 -72
- flyte/_trace.py +6 -14
- flyte/_utils/__init__.py +6 -0
- flyte/_utils/async_cache.py +139 -0
- flyte/_utils/coro_management.py +0 -2
- flyte/_utils/helpers.py +45 -19
- flyte/_utils/org_discovery.py +57 -0
- flyte/_version.py +2 -2
- flyte/cli/__init__.py +3 -0
- flyte/cli/_abort.py +28 -0
- flyte/{_cli → cli}/_common.py +73 -23
- flyte/cli/_create.py +145 -0
- flyte/{_cli → cli}/_delete.py +4 -4
- flyte/{_cli → cli}/_deploy.py +26 -14
- flyte/cli/_gen.py +163 -0
- flyte/{_cli → cli}/_get.py +98 -23
- {union/_cli → flyte/cli}/_params.py +106 -147
- flyte/{_cli → cli}/_run.py +99 -20
- flyte/cli/main.py +166 -0
- flyte/config/__init__.py +3 -0
- flyte/config/_config.py +216 -0
- flyte/config/_internal.py +64 -0
- flyte/config/_reader.py +207 -0
- flyte/errors.py +29 -0
- flyte/extras/_container.py +33 -43
- flyte/io/__init__.py +17 -1
- flyte/io/_dir.py +2 -2
- flyte/io/_file.py +3 -4
- flyte/io/{structured_dataset → _structured_dataset}/basic_dfs.py +1 -1
- flyte/io/{structured_dataset → _structured_dataset}/structured_dataset.py +1 -1
- flyte/{_datastructures.py → models.py} +56 -7
- flyte/remote/__init__.py +2 -1
- flyte/remote/_client/_protocols.py +2 -0
- flyte/remote/_client/auth/_auth_utils.py +14 -0
- flyte/remote/_client/auth/_channel.py +34 -3
- flyte/remote/_client/auth/_token_client.py +3 -3
- flyte/remote/_client/controlplane.py +13 -13
- flyte/remote/_console.py +1 -1
- flyte/remote/_data.py +10 -6
- flyte/remote/_logs.py +89 -29
- flyte/remote/_project.py +8 -9
- flyte/remote/_run.py +228 -131
- flyte/remote/_secret.py +12 -12
- flyte/remote/_task.py +179 -15
- flyte/report/_report.py +4 -4
- flyte/storage/__init__.py +5 -0
- flyte/storage/_config.py +233 -0
- flyte/storage/_storage.py +23 -3
- flyte/syncify/__init__.py +56 -0
- flyte/syncify/_api.py +371 -0
- flyte/types/__init__.py +23 -0
- flyte/types/_interface.py +22 -7
- flyte/{io/pickle/transformer.py → types/_pickle.py} +2 -1
- flyte/types/_type_engine.py +95 -18
- flyte-0.2.0a0.dist-info/METADATA +249 -0
- flyte-0.2.0a0.dist-info/RECORD +218 -0
- {flyte-0.0.1b3.dist-info → flyte-0.2.0a0.dist-info}/entry_points.txt +1 -1
- flyte/_api_commons.py +0 -3
- flyte/_cli/__init__.py +0 -0
- flyte/_cli/_create.py +0 -42
- flyte/_cli/main.py +0 -72
- flyte/_internal/controllers/pbhash.py +0 -39
- flyte/io/_dataframe.py +0 -0
- flyte/io/pickle/__init__.py +0 -0
- flyte-0.0.1b3.dist-info/METADATA +0 -179
- flyte-0.0.1b3.dist-info/RECORD +0 -390
- union/__init__.py +0 -54
- union/_api_commons.py +0 -3
- union/_bin/__init__.py +0 -0
- union/_bin/runtime.py +0 -113
- union/_build.py +0 -25
- union/_cache/__init__.py +0 -12
- union/_cache/cache.py +0 -141
- union/_cache/defaults.py +0 -9
- union/_cache/policy_function_body.py +0 -42
- union/_cli/__init__.py +0 -0
- union/_cli/_common.py +0 -263
- union/_cli/_create.py +0 -40
- union/_cli/_delete.py +0 -23
- union/_cli/_deploy.py +0 -120
- union/_cli/_get.py +0 -162
- union/_cli/_run.py +0 -150
- union/_cli/main.py +0 -72
- union/_code_bundle/__init__.py +0 -8
- union/_code_bundle/_ignore.py +0 -113
- union/_code_bundle/_packaging.py +0 -187
- union/_code_bundle/_utils.py +0 -342
- union/_code_bundle/bundle.py +0 -176
- union/_context.py +0 -146
- union/_datastructures.py +0 -295
- union/_deploy.py +0 -185
- union/_doc.py +0 -29
- union/_docstring.py +0 -26
- union/_environment.py +0 -43
- union/_group.py +0 -31
- union/_hash.py +0 -23
- union/_image.py +0 -760
- union/_initialize.py +0 -585
- union/_interface.py +0 -84
- union/_internal/__init__.py +0 -3
- union/_internal/controllers/__init__.py +0 -77
- union/_internal/controllers/_local_controller.py +0 -77
- union/_internal/controllers/pbhash.py +0 -39
- union/_internal/controllers/remote/__init__.py +0 -40
- union/_internal/controllers/remote/_action.py +0 -131
- union/_internal/controllers/remote/_client.py +0 -43
- union/_internal/controllers/remote/_controller.py +0 -169
- union/_internal/controllers/remote/_core.py +0 -341
- union/_internal/controllers/remote/_informer.py +0 -260
- union/_internal/controllers/remote/_service_protocol.py +0 -44
- union/_internal/imagebuild/__init__.py +0 -11
- union/_internal/imagebuild/docker_builder.py +0 -416
- union/_internal/imagebuild/image_builder.py +0 -243
- union/_internal/imagebuild/remote_builder.py +0 -0
- union/_internal/resolvers/__init__.py +0 -0
- union/_internal/resolvers/_task_module.py +0 -31
- union/_internal/resolvers/common.py +0 -24
- union/_internal/resolvers/default.py +0 -27
- union/_internal/runtime/__init__.py +0 -0
- union/_internal/runtime/convert.py +0 -163
- union/_internal/runtime/entrypoints.py +0 -121
- union/_internal/runtime/io.py +0 -136
- union/_internal/runtime/resources_serde.py +0 -134
- union/_internal/runtime/task_serde.py +0 -202
- union/_internal/runtime/taskrunner.py +0 -179
- union/_internal/runtime/types_serde.py +0 -53
- union/_logging.py +0 -124
- union/_protos/__init__.py +0 -0
- union/_protos/common/authorization_pb2.py +0 -66
- union/_protos/common/authorization_pb2.pyi +0 -106
- union/_protos/common/identifier_pb2.py +0 -71
- union/_protos/common/identifier_pb2.pyi +0 -82
- union/_protos/common/identity_pb2.py +0 -48
- union/_protos/common/identity_pb2.pyi +0 -72
- union/_protos/common/identity_pb2_grpc.py +0 -4
- union/_protos/common/list_pb2.py +0 -36
- union/_protos/common/list_pb2.pyi +0 -69
- union/_protos/common/list_pb2_grpc.py +0 -4
- union/_protos/common/policy_pb2.py +0 -37
- union/_protos/common/policy_pb2.pyi +0 -27
- union/_protos/common/policy_pb2_grpc.py +0 -4
- union/_protos/common/role_pb2.py +0 -37
- union/_protos/common/role_pb2.pyi +0 -51
- union/_protos/common/role_pb2_grpc.py +0 -4
- union/_protos/common/runtime_version_pb2.py +0 -28
- union/_protos/common/runtime_version_pb2.pyi +0 -24
- union/_protos/common/runtime_version_pb2_grpc.py +0 -4
- union/_protos/logs/dataplane/payload_pb2.py +0 -96
- union/_protos/logs/dataplane/payload_pb2.pyi +0 -168
- union/_protos/logs/dataplane/payload_pb2_grpc.py +0 -4
- union/_protos/secret/definition_pb2.py +0 -49
- union/_protos/secret/definition_pb2.pyi +0 -93
- union/_protos/secret/definition_pb2_grpc.py +0 -4
- union/_protos/secret/payload_pb2.py +0 -62
- union/_protos/secret/payload_pb2.pyi +0 -94
- union/_protos/secret/payload_pb2_grpc.py +0 -4
- union/_protos/secret/secret_pb2.py +0 -38
- union/_protos/secret/secret_pb2.pyi +0 -6
- union/_protos/secret/secret_pb2_grpc.py +0 -198
- union/_protos/validate/validate/validate_pb2.py +0 -76
- union/_protos/workflow/node_execution_service_pb2.py +0 -26
- union/_protos/workflow/node_execution_service_pb2.pyi +0 -4
- union/_protos/workflow/node_execution_service_pb2_grpc.py +0 -32
- union/_protos/workflow/queue_service_pb2.py +0 -75
- union/_protos/workflow/queue_service_pb2.pyi +0 -103
- union/_protos/workflow/queue_service_pb2_grpc.py +0 -172
- union/_protos/workflow/run_definition_pb2.py +0 -100
- union/_protos/workflow/run_definition_pb2.pyi +0 -256
- union/_protos/workflow/run_definition_pb2_grpc.py +0 -4
- union/_protos/workflow/run_logs_service_pb2.py +0 -41
- union/_protos/workflow/run_logs_service_pb2.pyi +0 -28
- union/_protos/workflow/run_logs_service_pb2_grpc.py +0 -69
- union/_protos/workflow/run_service_pb2.py +0 -133
- union/_protos/workflow/run_service_pb2.pyi +0 -173
- union/_protos/workflow/run_service_pb2_grpc.py +0 -412
- union/_protos/workflow/state_service_pb2.py +0 -58
- union/_protos/workflow/state_service_pb2.pyi +0 -69
- union/_protos/workflow/state_service_pb2_grpc.py +0 -138
- union/_protos/workflow/task_definition_pb2.py +0 -72
- union/_protos/workflow/task_definition_pb2.pyi +0 -65
- union/_protos/workflow/task_definition_pb2_grpc.py +0 -4
- union/_protos/workflow/task_service_pb2.py +0 -44
- union/_protos/workflow/task_service_pb2.pyi +0 -31
- union/_protos/workflow/task_service_pb2_grpc.py +0 -104
- union/_resources.py +0 -226
- union/_retry.py +0 -32
- union/_reusable_environment.py +0 -25
- union/_run.py +0 -374
- union/_secret.py +0 -61
- union/_task.py +0 -354
- union/_task_environment.py +0 -186
- union/_timeout.py +0 -47
- union/_tools.py +0 -27
- union/_utils/__init__.py +0 -11
- union/_utils/asyn.py +0 -119
- union/_utils/file_handling.py +0 -71
- union/_utils/helpers.py +0 -46
- union/_utils/lazy_module.py +0 -54
- union/_utils/uv_script_parser.py +0 -49
- union/_version.py +0 -21
- union/connectors/__init__.py +0 -0
- union/errors.py +0 -128
- union/extras/__init__.py +0 -5
- union/extras/_container.py +0 -263
- union/io/__init__.py +0 -11
- union/io/_dataframe.py +0 -0
- union/io/_dir.py +0 -425
- union/io/_file.py +0 -418
- union/io/pickle/__init__.py +0 -0
- union/io/pickle/transformer.py +0 -117
- union/io/structured_dataset/__init__.py +0 -122
- union/io/structured_dataset/basic_dfs.py +0 -219
- union/io/structured_dataset/structured_dataset.py +0 -1057
- union/py.typed +0 -0
- union/remote/__init__.py +0 -23
- union/remote/_client/__init__.py +0 -0
- union/remote/_client/_protocols.py +0 -129
- union/remote/_client/auth/__init__.py +0 -12
- union/remote/_client/auth/_authenticators/__init__.py +0 -0
- union/remote/_client/auth/_authenticators/base.py +0 -391
- union/remote/_client/auth/_authenticators/client_credentials.py +0 -73
- union/remote/_client/auth/_authenticators/device_code.py +0 -120
- union/remote/_client/auth/_authenticators/external_command.py +0 -77
- union/remote/_client/auth/_authenticators/factory.py +0 -200
- union/remote/_client/auth/_authenticators/pkce.py +0 -515
- union/remote/_client/auth/_channel.py +0 -184
- union/remote/_client/auth/_client_config.py +0 -83
- union/remote/_client/auth/_default_html.py +0 -32
- union/remote/_client/auth/_grpc_utils/__init__.py +0 -0
- union/remote/_client/auth/_grpc_utils/auth_interceptor.py +0 -204
- union/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +0 -144
- union/remote/_client/auth/_keyring.py +0 -154
- union/remote/_client/auth/_token_client.py +0 -258
- union/remote/_client/auth/errors.py +0 -16
- union/remote/_client/controlplane.py +0 -86
- union/remote/_data.py +0 -149
- union/remote/_logs.py +0 -74
- union/remote/_project.py +0 -86
- union/remote/_run.py +0 -820
- union/remote/_secret.py +0 -132
- union/remote/_task.py +0 -193
- union/report/__init__.py +0 -3
- union/report/_report.py +0 -178
- union/report/_template.html +0 -124
- union/storage/__init__.py +0 -24
- union/storage/_remote_fs.py +0 -34
- union/storage/_storage.py +0 -247
- union/storage/_utils.py +0 -5
- union/types/__init__.py +0 -11
- union/types/_renderer.py +0 -162
- union/types/_string_literals.py +0 -120
- union/types/_type_engine.py +0 -2131
- union/types/_utils.py +0 -80
- /union/_protos/common/authorization_pb2_grpc.py → /flyte/_protos/workflow/common_pb2_grpc.py +0 -0
- /union/_protos/common/identifier_pb2_grpc.py → /flyte/_protos/workflow/environment_pb2_grpc.py +0 -0
- /flyte/io/{structured_dataset → _structured_dataset}/__init__.py +0 -0
- {flyte-0.0.1b3.dist-info → flyte-0.2.0a0.dist-info}/WHEEL +0 -0
- {flyte-0.0.1b3.dist-info → flyte-0.2.0a0.dist-info}/top_level.txt +0 -0
|
@@ -1,341 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
import sys
|
|
5
|
-
import threading
|
|
6
|
-
from asyncio import Event
|
|
7
|
-
from typing import Awaitable, Coroutine, Dict, Optional
|
|
8
|
-
|
|
9
|
-
import grpc.aio
|
|
10
|
-
|
|
11
|
-
import union.errors
|
|
12
|
-
from union._logging import log, logger
|
|
13
|
-
from union._protos.workflow import queue_service_pb2, run_definition_pb2, task_definition_pb2
|
|
14
|
-
from union.errors import RuntimeSystemError
|
|
15
|
-
|
|
16
|
-
from ._action import Action
|
|
17
|
-
from ._informer import Informer, InformerCache
|
|
18
|
-
from ._service_protocol import ClientSet, QueueService, StateService
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class Controller:
|
|
22
|
-
"""
|
|
23
|
-
Generic controller with high-level submit API running in a dedicated thread with its own event loop.
|
|
24
|
-
All methods that begin with _bg_ are run in the controller's event loop, and will need to use
|
|
25
|
-
_run_coroutine_in_controller_thread to run them in the controller's event loop.
|
|
26
|
-
"""
|
|
27
|
-
|
|
28
|
-
def __init__(
|
|
29
|
-
self,
|
|
30
|
-
client_coro: Awaitable[ClientSet],
|
|
31
|
-
workers: int = 2,
|
|
32
|
-
max_system_retries: int = 5,
|
|
33
|
-
resource_log_interval_sec: float = 10.0,
|
|
34
|
-
min_backoff_on_err_sec: float = 0.1,
|
|
35
|
-
thread_wait_timeout_sec: float = 10.0,
|
|
36
|
-
):
|
|
37
|
-
"""
|
|
38
|
-
Create a new controller instance.
|
|
39
|
-
:param workers: Number of worker threads.
|
|
40
|
-
:param max_system_retries: Maximum number of system retries.
|
|
41
|
-
:param resource_log_interval_sec: Interval for logging resource stats.
|
|
42
|
-
:param min_backoff_on_err_sec: Minimum backoff time on error.
|
|
43
|
-
:param thread_wait_timeout_sec: Timeout for waiting for the controller thread to start.
|
|
44
|
-
:param
|
|
45
|
-
"""
|
|
46
|
-
self._informers = InformerCache()
|
|
47
|
-
self._shared_queue = asyncio.Queue(maxsize=10000)
|
|
48
|
-
self._running = False
|
|
49
|
-
self._completion_events: Dict[str, Event] = {} # Track completion events
|
|
50
|
-
self._resource_log_task = None
|
|
51
|
-
self._workers = workers
|
|
52
|
-
self._max_retries = max_system_retries
|
|
53
|
-
self._resource_log_interval = resource_log_interval_sec
|
|
54
|
-
self._min_backoff_on_err = min_backoff_on_err_sec
|
|
55
|
-
self._thread_wait_timeout = thread_wait_timeout_sec
|
|
56
|
-
self._client_coro = client_coro
|
|
57
|
-
|
|
58
|
-
self._initialize_lock: Dict[str, asyncio.Lock] = {}
|
|
59
|
-
|
|
60
|
-
# Thread management
|
|
61
|
-
self._thread = None
|
|
62
|
-
self._loop = None
|
|
63
|
-
self._thread_ready = threading.Event()
|
|
64
|
-
self._thread_exception = None
|
|
65
|
-
self._thread_com_lock = threading.Lock()
|
|
66
|
-
self._start()
|
|
67
|
-
|
|
68
|
-
# ---------------- Public sync methods, we can add more sync methods if needed
|
|
69
|
-
@log
|
|
70
|
-
def submit_action_sync(self, action: Action) -> Action:
|
|
71
|
-
"""Synchronous version of submit that runs in the controller's event loop"""
|
|
72
|
-
fut = self._run_coroutine_in_controller_thread(self._bg_submit_action(action))
|
|
73
|
-
return fut.result()
|
|
74
|
-
|
|
75
|
-
# --------------- Public async methods
|
|
76
|
-
async def _initialize_parent_action(
|
|
77
|
-
self, run_id: run_definition_pb2.RunIdentifier, parent_action_name: str, timeout: Optional[float] = None
|
|
78
|
-
):
|
|
79
|
-
name = Informer.mkname(run_name=run_id.name, parent_action_name=parent_action_name)
|
|
80
|
-
if name not in self._initialize_lock:
|
|
81
|
-
self._initialize_lock[name] = asyncio.Lock()
|
|
82
|
-
# We want to limit the coroutines working on this parent run from entering the initialization section,
|
|
83
|
-
# as we just want one informer to be initialized. This is why we take the lock. After the first one,
|
|
84
|
-
# subsequent initializations should be fast!!!!!
|
|
85
|
-
async with self._initialize_lock[name]:
|
|
86
|
-
return await self._run_coroutine_in_controller_thread(
|
|
87
|
-
self._bg_create_new_informer_and_wait(run_id, parent_action_name, timeout=timeout)
|
|
88
|
-
)
|
|
89
|
-
|
|
90
|
-
@log
|
|
91
|
-
async def submit_action(self, action: Action) -> Action:
|
|
92
|
-
"""Public API to submit a resource and wait for completion"""
|
|
93
|
-
await self._initialize_parent_action(
|
|
94
|
-
run_id=action.action_id.run, parent_action_name=action.parent_action_name, timeout=self._thread_wait_timeout
|
|
95
|
-
)
|
|
96
|
-
return await self._run_coroutine_in_controller_thread(self._bg_submit_action(action))
|
|
97
|
-
|
|
98
|
-
async def _finalize_parent_action(
|
|
99
|
-
self, run_id: run_definition_pb2.RunIdentifier, parent_action_name: str, timeout: Optional[float] = None
|
|
100
|
-
):
|
|
101
|
-
"""Finalize the parent run"""
|
|
102
|
-
name = Informer.mkname(run_name=run_id.name, parent_action_name=parent_action_name)
|
|
103
|
-
lock = self._initialize_lock.get(name)
|
|
104
|
-
if lock is None:
|
|
105
|
-
return
|
|
106
|
-
async with lock:
|
|
107
|
-
await self._run_coroutine_in_controller_thread(
|
|
108
|
-
self._bg_finalize_informer(run_id=run_id, parent_action_name=parent_action_name, timeout=timeout)
|
|
109
|
-
)
|
|
110
|
-
self._initialize_lock.pop(name, None)
|
|
111
|
-
|
|
112
|
-
@log
|
|
113
|
-
def stop(self, timeout: Optional[float] = None):
|
|
114
|
-
"""Stop the controller"""
|
|
115
|
-
return asyncio.wait_for(self._run_coroutine_in_controller_thread(self._bg_stop()), timeout)
|
|
116
|
-
|
|
117
|
-
# ------------- Background thread management methods
|
|
118
|
-
def _set_exception(self, exc: Optional[BaseException]):
|
|
119
|
-
"""Set exception in the thread lock"""
|
|
120
|
-
with self._thread_com_lock:
|
|
121
|
-
self._thread_exception = exc
|
|
122
|
-
|
|
123
|
-
def _get_exception(self) -> Optional[BaseException]:
|
|
124
|
-
"""Get exception in the thread lock"""
|
|
125
|
-
with self._thread_com_lock:
|
|
126
|
-
return self._thread_exception
|
|
127
|
-
|
|
128
|
-
@log
|
|
129
|
-
def _start(self):
|
|
130
|
-
"""Start the controller in a separate thread"""
|
|
131
|
-
if self._thread and self._thread.is_alive():
|
|
132
|
-
logger.warning("Controller thread is already running")
|
|
133
|
-
return
|
|
134
|
-
|
|
135
|
-
self._thread_ready.clear()
|
|
136
|
-
self._set_exception(None)
|
|
137
|
-
self._thread = threading.Thread(target=self._bg_thread_target, daemon=True, name="ControllerThread")
|
|
138
|
-
self._thread.start()
|
|
139
|
-
|
|
140
|
-
# Wait for the thread to be ready
|
|
141
|
-
logger.info("Waiting for controller thread to be ready...")
|
|
142
|
-
if not self._thread_ready.wait(timeout=self._thread_wait_timeout):
|
|
143
|
-
raise TimeoutError("Controller thread failed to start in time")
|
|
144
|
-
|
|
145
|
-
if self._get_exception():
|
|
146
|
-
raise RuntimeSystemError(
|
|
147
|
-
type(self._get_exception()).__name__, f"Controller thread startup failed: {self._get_exception()}"
|
|
148
|
-
)
|
|
149
|
-
|
|
150
|
-
logger.info(f"Controller started in thread: {self._thread.name}")
|
|
151
|
-
|
|
152
|
-
def _run_coroutine_in_controller_thread(self, coro: Coroutine) -> asyncio.Future:
|
|
153
|
-
"""Run a coroutine in the controller's event loop and return the result"""
|
|
154
|
-
with self._thread_com_lock:
|
|
155
|
-
loop = self._loop
|
|
156
|
-
if not self._loop or not self._thread or not self._thread.is_alive():
|
|
157
|
-
raise RuntimeError("Controller thread is not running")
|
|
158
|
-
|
|
159
|
-
assert self._thread.name != threading.current_thread().name, "Cannot run coroutine in the same thread"
|
|
160
|
-
|
|
161
|
-
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
|
162
|
-
return asyncio.wrap_future(future)
|
|
163
|
-
|
|
164
|
-
# ------------- Private methods that run on the background thread
|
|
165
|
-
async def _bg_worker_pool(self):
|
|
166
|
-
logger.debug("Starting controller worker pool")
|
|
167
|
-
self._running = True
|
|
168
|
-
logger.debug("Waiting for Service Client to be ready")
|
|
169
|
-
client_set = await self._client_coro
|
|
170
|
-
self._state_service: StateService = client_set.state_service
|
|
171
|
-
self._queue_service: QueueService = client_set.queue_service
|
|
172
|
-
self._resource_log_task = asyncio.create_task(self._bg_log_stats())
|
|
173
|
-
# We will wait for this to signal that the thread is ready
|
|
174
|
-
# Signal the main thread that we're ready
|
|
175
|
-
logger.debug("Background thread initialization complete")
|
|
176
|
-
self._thread_ready.set()
|
|
177
|
-
if sys.version_info >= (3, 11):
|
|
178
|
-
async with asyncio.TaskGroup() as tg:
|
|
179
|
-
for i in range(self._workers):
|
|
180
|
-
tg.create_task(self._bg_run())
|
|
181
|
-
else:
|
|
182
|
-
tasks = []
|
|
183
|
-
for i in range(self._workers):
|
|
184
|
-
tasks.append(asyncio.create_task(self._bg_run()))
|
|
185
|
-
await asyncio.gather(*tasks)
|
|
186
|
-
|
|
187
|
-
def _bg_thread_target(self):
|
|
188
|
-
"""Target function for the controller thread that creates and manages its own event loop"""
|
|
189
|
-
try:
|
|
190
|
-
# Create a new event loop for this thread
|
|
191
|
-
self._loop = asyncio.new_event_loop()
|
|
192
|
-
asyncio.set_event_loop(self._loop)
|
|
193
|
-
logger.debug(f"Controller thread started with new event loop: {threading.current_thread().name}")
|
|
194
|
-
|
|
195
|
-
self._loop.run_until_complete(self._bg_worker_pool())
|
|
196
|
-
except Exception as e:
|
|
197
|
-
logger.error(f"Controller thread encountered an exception: {e}")
|
|
198
|
-
self._set_exception(e)
|
|
199
|
-
finally:
|
|
200
|
-
if self._loop and self._loop.is_running():
|
|
201
|
-
self._loop.close()
|
|
202
|
-
logger.debug(f"Controller thread exiting: {threading.current_thread().name}")
|
|
203
|
-
|
|
204
|
-
async def _bg_create_new_informer_and_wait(
|
|
205
|
-
self, run_id: run_definition_pb2.RunIdentifier, parent_action_name: str, timeout: Optional[float] = None
|
|
206
|
-
):
|
|
207
|
-
if await self._informers.has(run_name=run_id.name, parent_action_name=parent_action_name):
|
|
208
|
-
return
|
|
209
|
-
informer = Informer(
|
|
210
|
-
run_id=run_id,
|
|
211
|
-
parent_action_name=parent_action_name,
|
|
212
|
-
shared_queue=self._shared_queue,
|
|
213
|
-
client=self._state_service,
|
|
214
|
-
)
|
|
215
|
-
await informer.start(timeout=timeout)
|
|
216
|
-
await self._informers.add(informer)
|
|
217
|
-
|
|
218
|
-
async def _bg_finalize_informer(
|
|
219
|
-
self, run_id: run_definition_pb2.RunIdentifier, parent_action_name: str, timeout: Optional[float] = None
|
|
220
|
-
):
|
|
221
|
-
informer = await self._informers.remove(run_name=run_id.name, parent_action_name=parent_action_name)
|
|
222
|
-
if informer:
|
|
223
|
-
await informer.stop()
|
|
224
|
-
|
|
225
|
-
@log
|
|
226
|
-
async def _bg_submit_action(self, action: Action) -> Action:
|
|
227
|
-
"""Submit a resource and await its completion, returning the final state"""
|
|
228
|
-
logger.debug(f"{threading.current_thread().name} Submitting action {action.name}")
|
|
229
|
-
informer = await self._informers.get(run_name=action.run_name, parent_action_name=action.parent_action_name)
|
|
230
|
-
# Create completion event and add resource
|
|
231
|
-
self._completion_events[action.name] = Event()
|
|
232
|
-
await informer.submit(action)
|
|
233
|
-
|
|
234
|
-
logger.debug(f"{threading.current_thread().name} Waiting for completion of {action.name}")
|
|
235
|
-
# Wait for completion
|
|
236
|
-
await self._completion_events[action.name].wait()
|
|
237
|
-
logger.info(f"{threading.current_thread().name} Action {action.name} completed")
|
|
238
|
-
|
|
239
|
-
# Get final resource state and clean up
|
|
240
|
-
final_resource = await informer.get(action.name)
|
|
241
|
-
if final_resource is None:
|
|
242
|
-
raise ValueError(f"Action {action.name} not found")
|
|
243
|
-
del self._completion_events[action.name]
|
|
244
|
-
logger.debug(f"{threading.current_thread().name} Removed completion event for action {action.name}")
|
|
245
|
-
await informer.remove(action.name) # TODO we should not remove maybe, we should keep a record of completed?
|
|
246
|
-
logger.debug(f"{threading.current_thread().name} Removed action {action.name}, final={final_resource}")
|
|
247
|
-
return final_resource
|
|
248
|
-
|
|
249
|
-
async def _bg_launch(self, action: Action):
|
|
250
|
-
"""
|
|
251
|
-
Attempt to launch an action.
|
|
252
|
-
"""
|
|
253
|
-
if not action.is_started():
|
|
254
|
-
logger.debug(f"Attempting to launch action: {action.name}")
|
|
255
|
-
try:
|
|
256
|
-
await self._queue_service.EnqueueAction(
|
|
257
|
-
queue_service_pb2.EnqueueActionRequest(
|
|
258
|
-
action_id=action.action_id,
|
|
259
|
-
parent_action_name=action.parent_action_name,
|
|
260
|
-
task_id=task_definition_pb2.TaskIdentifier(
|
|
261
|
-
version=action.task.task_template.id.version,
|
|
262
|
-
org=action.task.task_template.id.org,
|
|
263
|
-
project=action.task.task_template.id.project,
|
|
264
|
-
domain=action.task.task_template.id.domain,
|
|
265
|
-
name=action.task.task_template.id.name,
|
|
266
|
-
),
|
|
267
|
-
task_spec=action.task,
|
|
268
|
-
input_uri=action.inputs_uri,
|
|
269
|
-
output_uri=action.outputs_uri,
|
|
270
|
-
group=action.group,
|
|
271
|
-
# Subject is not used in the current implementation
|
|
272
|
-
)
|
|
273
|
-
)
|
|
274
|
-
logger.info(f"Successfully launched action: {action.name}")
|
|
275
|
-
except grpc.aio.AioRpcError as e:
|
|
276
|
-
if e.code() == grpc.StatusCode.ALREADY_EXISTS:
|
|
277
|
-
logger.info(f"Action {action.name} already exists, continuing to monitor.")
|
|
278
|
-
return
|
|
279
|
-
logger.exception(f"Failed to launch action: {action.name} backing off...")
|
|
280
|
-
logger.debug(f"Action details: {action}")
|
|
281
|
-
raise e
|
|
282
|
-
|
|
283
|
-
@log
|
|
284
|
-
async def _bg_process(self, action: Action):
|
|
285
|
-
"""Process resource updates"""
|
|
286
|
-
logger.debug(f"Processing action: name={action.name}, started={action.is_started()}")
|
|
287
|
-
|
|
288
|
-
if not action.is_started():
|
|
289
|
-
await self._bg_launch(action)
|
|
290
|
-
elif action.is_terminal():
|
|
291
|
-
if action.name in self._completion_events:
|
|
292
|
-
# TODO This can conflict, we probably need a completion cache.
|
|
293
|
-
self._completion_events[action.name].set() # Signal completion
|
|
294
|
-
else:
|
|
295
|
-
logger.debug(f"Resource {action.name} still in progress...")
|
|
296
|
-
|
|
297
|
-
async def _bg_log_stats(self):
|
|
298
|
-
"""Periodically log resource stats if debug is enabled"""
|
|
299
|
-
while self._running:
|
|
300
|
-
async for started, pending, terminal in self._informers.count_started_pending_terminal_actions():
|
|
301
|
-
logger.info(f"Resource stats: Started={started}, Pending={pending}, Terminal={terminal}")
|
|
302
|
-
await asyncio.sleep(self._resource_log_interval)
|
|
303
|
-
|
|
304
|
-
@log
|
|
305
|
-
async def _bg_run(self):
|
|
306
|
-
"""Run loop with resource status logging"""
|
|
307
|
-
while self._running:
|
|
308
|
-
logger.debug(f"{threading.current_thread().name} Waiting for resource")
|
|
309
|
-
action = await self._shared_queue.get()
|
|
310
|
-
logger.debug(f"{threading.current_thread().name} Got resource {action.name}")
|
|
311
|
-
try:
|
|
312
|
-
await self._bg_process(action)
|
|
313
|
-
except Exception as e:
|
|
314
|
-
logger.error(f"Error in controller loop: {e}")
|
|
315
|
-
# TODO we need a better way of handling backoffs currently the entire worker coroutine backs off
|
|
316
|
-
await asyncio.sleep(self._min_backoff_on_err)
|
|
317
|
-
action.increment_retries()
|
|
318
|
-
if action.retries > self._max_retries:
|
|
319
|
-
err = union.errors.RuntimeSystemError(
|
|
320
|
-
code=type(e).__name__,
|
|
321
|
-
message=f"Controller failed, system retries {action.retries}"
|
|
322
|
-
f" crossed threshold {self._max_retries}",
|
|
323
|
-
)
|
|
324
|
-
err.__cause__ = e
|
|
325
|
-
action.set_client_error(err)
|
|
326
|
-
if action.name in self._completion_events:
|
|
327
|
-
self._completion_events[action.name].set() # Signal completion
|
|
328
|
-
else:
|
|
329
|
-
await self._shared_queue.put(action)
|
|
330
|
-
finally:
|
|
331
|
-
self._shared_queue.task_done()
|
|
332
|
-
|
|
333
|
-
@log
|
|
334
|
-
async def _bg_stop(self):
|
|
335
|
-
"""Stop the controller"""
|
|
336
|
-
self._running = False
|
|
337
|
-
for event in self._completion_events.values():
|
|
338
|
-
event.set() # Unblock any waiting submit calls
|
|
339
|
-
self._completion_events.clear()
|
|
340
|
-
self._resource_log_task.cancel()
|
|
341
|
-
await self._informers.remove_and_stop_all()
|
|
@@ -1,260 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
from asyncio import Queue
|
|
5
|
-
from typing import AsyncIterator, Dict, Optional, Tuple
|
|
6
|
-
|
|
7
|
-
import grpc.aio
|
|
8
|
-
|
|
9
|
-
from union._logging import log, logger
|
|
10
|
-
from union._protos.workflow import run_definition_pb2, state_service_pb2
|
|
11
|
-
|
|
12
|
-
from ._action import Action
|
|
13
|
-
from ._service_protocol import StateService
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class ActionCache:
|
|
17
|
-
"""
|
|
18
|
-
Cache for actions, used to store the state of all sub-actions, launched by this parent action.
|
|
19
|
-
This is coroutine-safe.
|
|
20
|
-
"""
|
|
21
|
-
|
|
22
|
-
def __init__(self, parent_action_name: str):
|
|
23
|
-
self._cache: Dict[str, Action] = {}
|
|
24
|
-
self._lock = asyncio.Lock()
|
|
25
|
-
self._parent_action_name = parent_action_name
|
|
26
|
-
|
|
27
|
-
async def has(self, name: str) -> bool:
|
|
28
|
-
"""Check if a node is in the cache"""
|
|
29
|
-
async with self._lock:
|
|
30
|
-
return name in self._cache
|
|
31
|
-
|
|
32
|
-
async def observe_state(self, state: state_service_pb2.ActionUpdate) -> Action:
|
|
33
|
-
"""
|
|
34
|
-
Add an action to the cache if it doesn't exist. This is invoked by the watch.
|
|
35
|
-
"""
|
|
36
|
-
logger.debug(f"Observing phase {run_definition_pb2.Phase.Name(state.phase)} for {state.action_id.name}")
|
|
37
|
-
if state.phase == run_definition_pb2.Phase.PHASE_FAILED:
|
|
38
|
-
logger.error(
|
|
39
|
-
f"Action {state.action_id.name} failed with error (msg):"
|
|
40
|
-
f" [{state.error if state.HasField('error') else None}]"
|
|
41
|
-
)
|
|
42
|
-
async with self._lock:
|
|
43
|
-
if state.action_id.name in self._cache:
|
|
44
|
-
self._cache[state.action_id.name].merge_state(state)
|
|
45
|
-
else:
|
|
46
|
-
self._cache[state.action_id.name] = Action.from_state(self._parent_action_name, state)
|
|
47
|
-
return self._cache[state.action_id.name]
|
|
48
|
-
|
|
49
|
-
async def submit(self, action: Action) -> Action:
|
|
50
|
-
"""
|
|
51
|
-
Submit a new Action to the cache. This is invoked by the parent_action.
|
|
52
|
-
"""
|
|
53
|
-
async with self._lock:
|
|
54
|
-
if action.name in self._cache:
|
|
55
|
-
self._cache[action.name].merge_in_action_from_submit(action)
|
|
56
|
-
else:
|
|
57
|
-
self._cache[action.name] = action
|
|
58
|
-
return self._cache[action.name]
|
|
59
|
-
|
|
60
|
-
async def get(self, name: str) -> Action | None:
|
|
61
|
-
"""Get an action by its name from the cache"""
|
|
62
|
-
async with self._lock:
|
|
63
|
-
return self._cache.get(name, None)
|
|
64
|
-
|
|
65
|
-
async def remove(self, name: str) -> Action | None:
|
|
66
|
-
"""Remove an action from the cache"""
|
|
67
|
-
async with self._lock:
|
|
68
|
-
return self._cache.pop(name, None)
|
|
69
|
-
|
|
70
|
-
async def count_started_pending_terminal_actions(self) -> Tuple[int, int, int]:
|
|
71
|
-
"""
|
|
72
|
-
Get all started, pending and terminal actions.
|
|
73
|
-
Started: implies they were submitted to queue service
|
|
74
|
-
Pending: implies they are still not submitted to the queue service
|
|
75
|
-
Terminal: implies completed (success, failure, aborted, timedout) actions
|
|
76
|
-
"""
|
|
77
|
-
started = 0
|
|
78
|
-
pending = 0
|
|
79
|
-
terminal = 0
|
|
80
|
-
async with self._lock:
|
|
81
|
-
for name, res in self._cache.items():
|
|
82
|
-
if res.is_started():
|
|
83
|
-
started += 1
|
|
84
|
-
elif res.is_terminal():
|
|
85
|
-
terminal += 1
|
|
86
|
-
else:
|
|
87
|
-
pending += 1
|
|
88
|
-
return started, pending, terminal
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
class Informer:
|
|
92
|
-
"""Remote StateStore watcher and informer for sub-actions."""
|
|
93
|
-
|
|
94
|
-
def __init__(
|
|
95
|
-
self,
|
|
96
|
-
run_id: run_definition_pb2.RunIdentifier,
|
|
97
|
-
parent_action_name: str,
|
|
98
|
-
shared_queue: Queue,
|
|
99
|
-
client: StateService = None,
|
|
100
|
-
watch_backoff_interval_sec: float = 1.0,
|
|
101
|
-
):
|
|
102
|
-
self.name = self.mkname(run_name=run_id.name, parent_action_name=parent_action_name)
|
|
103
|
-
self.parent_action_name = parent_action_name
|
|
104
|
-
self._run_id = run_id
|
|
105
|
-
self._client = client
|
|
106
|
-
self._action_cache = ActionCache(parent_action_name)
|
|
107
|
-
self._shared_queue = shared_queue
|
|
108
|
-
self._running = False
|
|
109
|
-
self._watch_task: asyncio.Task | None = None
|
|
110
|
-
self._ready = asyncio.Event()
|
|
111
|
-
self._watch_backoff_interval_sec = watch_backoff_interval_sec
|
|
112
|
-
|
|
113
|
-
@classmethod
|
|
114
|
-
def mkname(cls, *, run_name: str, parent_action_name: str) -> str:
|
|
115
|
-
"""Get the name of the informer"""
|
|
116
|
-
return f"{run_name}.{parent_action_name}"
|
|
117
|
-
|
|
118
|
-
def is_running(self) -> bool:
|
|
119
|
-
"""Check if informer is running"""
|
|
120
|
-
return self._running
|
|
121
|
-
|
|
122
|
-
async def _set_ready(self):
|
|
123
|
-
"""Set the informer as ready"""
|
|
124
|
-
self._ready.set()
|
|
125
|
-
|
|
126
|
-
async def wait_for_cache_sync(self, timeout: Optional[float] = None) -> bool:
|
|
127
|
-
"""
|
|
128
|
-
Wait for the informer to be ready. In the case of a timeout, it will return False.
|
|
129
|
-
:param timeout: float time to wait for
|
|
130
|
-
:return: bool
|
|
131
|
-
"""
|
|
132
|
-
try:
|
|
133
|
-
await asyncio.wait_for(self._ready.wait(), timeout=timeout)
|
|
134
|
-
except asyncio.TimeoutError:
|
|
135
|
-
logger.error(f"Informer cache sync timed out, for {self.name}")
|
|
136
|
-
return False
|
|
137
|
-
|
|
138
|
-
@log
|
|
139
|
-
async def submit(self, action: Action):
|
|
140
|
-
"""Add a new resource to watch"""
|
|
141
|
-
node = await self._action_cache.submit(action)
|
|
142
|
-
await self._shared_queue.put(node)
|
|
143
|
-
|
|
144
|
-
@log
|
|
145
|
-
async def remove(self, name: str):
|
|
146
|
-
"""Remove a resource from watching"""
|
|
147
|
-
await self._action_cache.remove(name)
|
|
148
|
-
|
|
149
|
-
async def get(self, name: str) -> Action | None:
|
|
150
|
-
"""Get a resource by name"""
|
|
151
|
-
return await self._action_cache.get(name)
|
|
152
|
-
|
|
153
|
-
async def has(self, name: str) -> bool:
|
|
154
|
-
"""Check if a resource exists"""
|
|
155
|
-
return await self._action_cache.has(name)
|
|
156
|
-
|
|
157
|
-
async def watch(self):
|
|
158
|
-
"""Watch for updates on all resources - to be implemented by subclasses for watch mode"""
|
|
159
|
-
# sentinel = False
|
|
160
|
-
while self._running:
|
|
161
|
-
try:
|
|
162
|
-
watcher = self._client.Watch(
|
|
163
|
-
state_service_pb2.WatchRequest(
|
|
164
|
-
parent_action_id=run_definition_pb2.ActionIdentifier(
|
|
165
|
-
name=self.parent_action_name,
|
|
166
|
-
run=self._run_id,
|
|
167
|
-
),
|
|
168
|
-
),
|
|
169
|
-
)
|
|
170
|
-
resp: state_service_pb2.WatchResponse
|
|
171
|
-
async for resp in watcher:
|
|
172
|
-
if resp.control_message is not None and resp.control_message.sentinel:
|
|
173
|
-
logger.info(f"Received Sentinel, for run {self.name}")
|
|
174
|
-
await self._set_ready()
|
|
175
|
-
continue
|
|
176
|
-
node = await self._action_cache.observe_state(resp.action_update)
|
|
177
|
-
await self._shared_queue.put(node)
|
|
178
|
-
# hack to work in the absence of sentinel
|
|
179
|
-
except asyncio.CancelledError as e:
|
|
180
|
-
logger.warning(f"Watch cancelled: {self.name} {e!s}")
|
|
181
|
-
except asyncio.TimeoutError:
|
|
182
|
-
logger.exception(f"Watch timeout: {self.name}")
|
|
183
|
-
except grpc.aio.AioRpcError:
|
|
184
|
-
logger.exception(f"RPC error: {self.name}")
|
|
185
|
-
except Exception:
|
|
186
|
-
logger.exception(f"Watch error: {self.name}")
|
|
187
|
-
await asyncio.sleep(self._watch_backoff_interval_sec)
|
|
188
|
-
|
|
189
|
-
@log
|
|
190
|
-
async def start(self, timeout: Optional[float] = None):
|
|
191
|
-
"""Start the informer"""
|
|
192
|
-
if self._running:
|
|
193
|
-
logger.warning("Informer already running")
|
|
194
|
-
return
|
|
195
|
-
self._running = True
|
|
196
|
-
self._watch_task = asyncio.create_task(self.watch())
|
|
197
|
-
await self.wait_for_cache_sync(timeout=timeout)
|
|
198
|
-
|
|
199
|
-
async def count_started_pending_terminal_actions(self) -> Tuple[int, int, int]:
|
|
200
|
-
"""Get all launched and waiting resources"""
|
|
201
|
-
return await self._action_cache.count_started_pending_terminal_actions()
|
|
202
|
-
|
|
203
|
-
@log
|
|
204
|
-
async def stop(self):
|
|
205
|
-
"""Stop the informer"""
|
|
206
|
-
self._running = False
|
|
207
|
-
if self._watch_task:
|
|
208
|
-
self._watch_task.cancel()
|
|
209
|
-
self._watch_task = None
|
|
210
|
-
logger.info("Stopped informer")
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
class InformerCache:
|
|
214
|
-
"""Cache for informers, used to store the state of all subactions for multiple parent_actions.
|
|
215
|
-
This is coroutine-safe.
|
|
216
|
-
"""
|
|
217
|
-
|
|
218
|
-
def __init__(self):
|
|
219
|
-
self._cache: Dict[str, Informer] = {}
|
|
220
|
-
self._lock = asyncio.Lock()
|
|
221
|
-
|
|
222
|
-
@log
|
|
223
|
-
async def add(self, informer: Informer) -> bool:
|
|
224
|
-
"""Add a new informer to the cache"""
|
|
225
|
-
async with self._lock:
|
|
226
|
-
if informer.name in self._cache:
|
|
227
|
-
return False
|
|
228
|
-
self._cache[informer.name] = informer
|
|
229
|
-
return True
|
|
230
|
-
|
|
231
|
-
@log
|
|
232
|
-
async def get(self, *, run_name: str, parent_action_name: str) -> Informer | None:
|
|
233
|
-
"""Get an informer by name"""
|
|
234
|
-
async with self._lock:
|
|
235
|
-
return self._cache.get(Informer.mkname(run_name=run_name, parent_action_name=parent_action_name), None)
|
|
236
|
-
|
|
237
|
-
@log
|
|
238
|
-
async def remove(self, *, run_name: str, parent_action_name: str) -> Informer | None:
|
|
239
|
-
"""Remove an informer from the cache"""
|
|
240
|
-
async with self._lock:
|
|
241
|
-
return self._cache.pop(Informer.mkname(run_name=run_name, parent_action_name=parent_action_name), None)
|
|
242
|
-
|
|
243
|
-
async def has(self, *, run_name: str, parent_action_name: str) -> bool:
|
|
244
|
-
"""Check if an informer exists in the cache"""
|
|
245
|
-
async with self._lock:
|
|
246
|
-
return Informer.mkname(run_name=run_name, parent_action_name=parent_action_name) in self._cache
|
|
247
|
-
|
|
248
|
-
async def count_started_pending_terminal_actions(self) -> AsyncIterator[Tuple[int, int, int]]:
|
|
249
|
-
"""Log resource stats"""
|
|
250
|
-
async with self._lock:
|
|
251
|
-
for informer in self._cache.values():
|
|
252
|
-
yield await informer.count_started_pending_terminal_actions()
|
|
253
|
-
|
|
254
|
-
async def remove_and_stop_all(self):
|
|
255
|
-
"""Stop all informers and remove them from the cache"""
|
|
256
|
-
async with self._lock:
|
|
257
|
-
while self._cache:
|
|
258
|
-
name, informer = self._cache.popitem()
|
|
259
|
-
await informer.stop()
|
|
260
|
-
self._cache.clear()
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from typing import AsyncIterator, Protocol
|
|
4
|
-
|
|
5
|
-
from union._protos.workflow import queue_service_pb2, state_service_pb2
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class StateService(Protocol):
|
|
9
|
-
"""
|
|
10
|
-
Interface for the state store client, which stores the history of all subruns.
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
async def Watch(self, req: state_service_pb2.WatchRequest) -> AsyncIterator[state_service_pb2.WatchResponse]:
|
|
14
|
-
"""Watch for subrun updates"""
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class QueueService(Protocol):
|
|
18
|
-
"""
|
|
19
|
-
Interface for the remote queue service, which is responsible for managing the queue of tasks.
|
|
20
|
-
"""
|
|
21
|
-
|
|
22
|
-
async def EnqueueAction(
|
|
23
|
-
self, req: queue_service_pb2.EnqueueActionRequest
|
|
24
|
-
) -> queue_service_pb2.EnqueueActionResponse:
|
|
25
|
-
"""Enqueue a task"""
|
|
26
|
-
|
|
27
|
-
async def AbortQueuedAction(
|
|
28
|
-
self, req: queue_service_pb2.AbortQueuedActionRequest
|
|
29
|
-
) -> queue_service_pb2.AbortQueuedActionResponse:
|
|
30
|
-
"""Dequeue a task"""
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class ClientSet(Protocol):
|
|
34
|
-
"""
|
|
35
|
-
Interface for the remote client set, which is responsible for managing the queue of tasks.
|
|
36
|
-
"""
|
|
37
|
-
|
|
38
|
-
@property
|
|
39
|
-
def state_service(self) -> StateService:
|
|
40
|
-
"""State service"""
|
|
41
|
-
|
|
42
|
-
@property
|
|
43
|
-
def queue_service(self) -> QueueService:
|
|
44
|
-
"""Queue service"""
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
from typing import List
|
|
3
|
-
|
|
4
|
-
from union._image import Image
|
|
5
|
-
from union._internal.imagebuild.docker_builder import DockerImageBuilder
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
async def build(images: List[Image]) -> List[str]:
|
|
9
|
-
builder = DockerImageBuilder()
|
|
10
|
-
ts = [asyncio.create_task(builder.build_image(image)) for image in images]
|
|
11
|
-
return list(await asyncio.gather(*ts))
|