opengris-scaler 1.12.37__cp38-cp38-musllinux_1_2_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opengris_scaler-1.12.37.dist-info/METADATA +730 -0
- opengris_scaler-1.12.37.dist-info/RECORD +196 -0
- opengris_scaler-1.12.37.dist-info/WHEEL +5 -0
- opengris_scaler-1.12.37.dist-info/entry_points.txt +10 -0
- opengris_scaler-1.12.37.dist-info/licenses/LICENSE +201 -0
- opengris_scaler-1.12.37.dist-info/licenses/LICENSE.spdx +7 -0
- opengris_scaler-1.12.37.dist-info/licenses/NOTICE +8 -0
- opengris_scaler.libs/libcapnp-1-e88d5415.0.1.so +0 -0
- opengris_scaler.libs/libgcc_s-2298274a.so.1 +0 -0
- opengris_scaler.libs/libkj-1-9bebd8ac.0.1.so +0 -0
- opengris_scaler.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
- scaler/__init__.py +14 -0
- scaler/about.py +5 -0
- scaler/client/__init__.py +0 -0
- scaler/client/agent/__init__.py +0 -0
- scaler/client/agent/client_agent.py +218 -0
- scaler/client/agent/disconnect_manager.py +27 -0
- scaler/client/agent/future_manager.py +112 -0
- scaler/client/agent/heartbeat_manager.py +74 -0
- scaler/client/agent/mixins.py +89 -0
- scaler/client/agent/object_manager.py +98 -0
- scaler/client/agent/task_manager.py +64 -0
- scaler/client/client.py +672 -0
- scaler/client/future.py +252 -0
- scaler/client/object_buffer.py +129 -0
- scaler/client/object_reference.py +25 -0
- scaler/client/serializer/__init__.py +0 -0
- scaler/client/serializer/default.py +16 -0
- scaler/client/serializer/mixins.py +38 -0
- scaler/cluster/__init__.py +0 -0
- scaler/cluster/cluster.py +95 -0
- scaler/cluster/combo.py +157 -0
- scaler/cluster/object_storage_server.py +45 -0
- scaler/cluster/scheduler.py +86 -0
- scaler/config/__init__.py +0 -0
- scaler/config/common/__init__.py +0 -0
- scaler/config/common/logging.py +41 -0
- scaler/config/common/web.py +18 -0
- scaler/config/common/worker.py +65 -0
- scaler/config/common/worker_adapter.py +28 -0
- scaler/config/config_class.py +317 -0
- scaler/config/defaults.py +94 -0
- scaler/config/mixins.py +20 -0
- scaler/config/section/__init__.py +0 -0
- scaler/config/section/cluster.py +66 -0
- scaler/config/section/ecs_worker_adapter.py +78 -0
- scaler/config/section/native_worker_adapter.py +30 -0
- scaler/config/section/object_storage_server.py +13 -0
- scaler/config/section/scheduler.py +126 -0
- scaler/config/section/symphony_worker_adapter.py +35 -0
- scaler/config/section/top.py +16 -0
- scaler/config/section/webui.py +16 -0
- scaler/config/types/__init__.py +0 -0
- scaler/config/types/network_backend.py +12 -0
- scaler/config/types/object_storage_server.py +45 -0
- scaler/config/types/worker.py +67 -0
- scaler/config/types/zmq.py +83 -0
- scaler/entry_points/__init__.py +0 -0
- scaler/entry_points/cluster.py +10 -0
- scaler/entry_points/object_storage_server.py +26 -0
- scaler/entry_points/scheduler.py +51 -0
- scaler/entry_points/top.py +272 -0
- scaler/entry_points/webui.py +6 -0
- scaler/entry_points/worker_adapter_ecs.py +22 -0
- scaler/entry_points/worker_adapter_native.py +31 -0
- scaler/entry_points/worker_adapter_symphony.py +26 -0
- scaler/io/__init__.py +0 -0
- scaler/io/async_binder.py +89 -0
- scaler/io/async_connector.py +95 -0
- scaler/io/async_object_storage_connector.py +225 -0
- scaler/io/mixins.py +154 -0
- scaler/io/sync_connector.py +68 -0
- scaler/io/sync_object_storage_connector.py +249 -0
- scaler/io/sync_subscriber.py +83 -0
- scaler/io/utility.py +80 -0
- scaler/io/ymq/__init__.py +0 -0
- scaler/io/ymq/_ymq.pyi +95 -0
- scaler/io/ymq/_ymq.so +0 -0
- scaler/io/ymq/ymq.py +138 -0
- scaler/io/ymq_async_object_storage_connector.py +184 -0
- scaler/io/ymq_sync_object_storage_connector.py +184 -0
- scaler/object_storage/__init__.py +0 -0
- scaler/object_storage/object_storage_server.so +0 -0
- scaler/protocol/__init__.py +0 -0
- scaler/protocol/capnp/__init__.py +0 -0
- scaler/protocol/capnp/_python.py +6 -0
- scaler/protocol/capnp/common.capnp +68 -0
- scaler/protocol/capnp/message.capnp +218 -0
- scaler/protocol/capnp/object_storage.capnp +57 -0
- scaler/protocol/capnp/status.capnp +73 -0
- scaler/protocol/introduction.md +105 -0
- scaler/protocol/python/__init__.py +0 -0
- scaler/protocol/python/common.py +140 -0
- scaler/protocol/python/message.py +751 -0
- scaler/protocol/python/mixins.py +13 -0
- scaler/protocol/python/object_storage.py +118 -0
- scaler/protocol/python/status.py +279 -0
- scaler/protocol/worker.md +228 -0
- scaler/scheduler/__init__.py +0 -0
- scaler/scheduler/allocate_policy/__init__.py +0 -0
- scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
- scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
- scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
- scaler/scheduler/allocate_policy/mixins.py +55 -0
- scaler/scheduler/controllers/__init__.py +0 -0
- scaler/scheduler/controllers/balance_controller.py +65 -0
- scaler/scheduler/controllers/client_controller.py +131 -0
- scaler/scheduler/controllers/config_controller.py +31 -0
- scaler/scheduler/controllers/graph_controller.py +424 -0
- scaler/scheduler/controllers/information_controller.py +81 -0
- scaler/scheduler/controllers/mixins.py +194 -0
- scaler/scheduler/controllers/object_controller.py +147 -0
- scaler/scheduler/controllers/scaling_policies/__init__.py +0 -0
- scaler/scheduler/controllers/scaling_policies/fixed_elastic.py +145 -0
- scaler/scheduler/controllers/scaling_policies/mixins.py +10 -0
- scaler/scheduler/controllers/scaling_policies/null.py +14 -0
- scaler/scheduler/controllers/scaling_policies/types.py +9 -0
- scaler/scheduler/controllers/scaling_policies/utility.py +20 -0
- scaler/scheduler/controllers/scaling_policies/vanilla.py +95 -0
- scaler/scheduler/controllers/task_controller.py +376 -0
- scaler/scheduler/controllers/worker_controller.py +169 -0
- scaler/scheduler/object_usage/__init__.py +0 -0
- scaler/scheduler/object_usage/object_tracker.py +131 -0
- scaler/scheduler/scheduler.py +251 -0
- scaler/scheduler/task/__init__.py +0 -0
- scaler/scheduler/task/task_state_machine.py +92 -0
- scaler/scheduler/task/task_state_manager.py +61 -0
- scaler/ui/__init__.py +0 -0
- scaler/ui/common/__init__.py +0 -0
- scaler/ui/common/constants.py +9 -0
- scaler/ui/common/live_display.py +147 -0
- scaler/ui/common/memory_window.py +146 -0
- scaler/ui/common/setting_page.py +40 -0
- scaler/ui/common/task_graph.py +840 -0
- scaler/ui/common/task_log.py +111 -0
- scaler/ui/common/utility.py +66 -0
- scaler/ui/common/webui.py +80 -0
- scaler/ui/common/worker_processors.py +104 -0
- scaler/ui/v1.py +76 -0
- scaler/ui/v2.py +102 -0
- scaler/ui/webui.py +21 -0
- scaler/utility/__init__.py +0 -0
- scaler/utility/debug.py +19 -0
- scaler/utility/event_list.py +63 -0
- scaler/utility/event_loop.py +58 -0
- scaler/utility/exceptions.py +42 -0
- scaler/utility/formatter.py +44 -0
- scaler/utility/graph/__init__.py +0 -0
- scaler/utility/graph/optimization.py +27 -0
- scaler/utility/graph/topological_sorter.py +11 -0
- scaler/utility/graph/topological_sorter_graphblas.py +174 -0
- scaler/utility/identifiers.py +107 -0
- scaler/utility/logging/__init__.py +0 -0
- scaler/utility/logging/decorators.py +25 -0
- scaler/utility/logging/scoped_logger.py +33 -0
- scaler/utility/logging/utility.py +183 -0
- scaler/utility/many_to_many_dict.py +123 -0
- scaler/utility/metadata/__init__.py +0 -0
- scaler/utility/metadata/profile_result.py +31 -0
- scaler/utility/metadata/task_flags.py +30 -0
- scaler/utility/mixins.py +13 -0
- scaler/utility/network_util.py +7 -0
- scaler/utility/one_to_many_dict.py +72 -0
- scaler/utility/queues/__init__.py +0 -0
- scaler/utility/queues/async_indexed_queue.py +37 -0
- scaler/utility/queues/async_priority_queue.py +70 -0
- scaler/utility/queues/async_sorted_priority_queue.py +45 -0
- scaler/utility/queues/indexed_queue.py +114 -0
- scaler/utility/serialization.py +9 -0
- scaler/version.txt +1 -0
- scaler/worker/__init__.py +0 -0
- scaler/worker/agent/__init__.py +0 -0
- scaler/worker/agent/heartbeat_manager.py +110 -0
- scaler/worker/agent/mixins.py +137 -0
- scaler/worker/agent/processor/__init__.py +0 -0
- scaler/worker/agent/processor/object_cache.py +107 -0
- scaler/worker/agent/processor/processor.py +285 -0
- scaler/worker/agent/processor/streaming_buffer.py +28 -0
- scaler/worker/agent/processor_holder.py +147 -0
- scaler/worker/agent/processor_manager.py +369 -0
- scaler/worker/agent/profiling_manager.py +109 -0
- scaler/worker/agent/task_manager.py +150 -0
- scaler/worker/agent/timeout_manager.py +19 -0
- scaler/worker/preload.py +84 -0
- scaler/worker/worker.py +265 -0
- scaler/worker_adapter/__init__.py +0 -0
- scaler/worker_adapter/common.py +26 -0
- scaler/worker_adapter/ecs.py +241 -0
- scaler/worker_adapter/native.py +138 -0
- scaler/worker_adapter/symphony/__init__.py +0 -0
- scaler/worker_adapter/symphony/callback.py +45 -0
- scaler/worker_adapter/symphony/heartbeat_manager.py +82 -0
- scaler/worker_adapter/symphony/message.py +24 -0
- scaler/worker_adapter/symphony/task_manager.py +289 -0
- scaler/worker_adapter/symphony/worker.py +204 -0
- scaler/worker_adapter/symphony/worker_adapter.py +123 -0
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
import multiprocessing
|
|
4
|
+
import signal
|
|
5
|
+
from collections import deque
|
|
6
|
+
from typing import Dict, Optional
|
|
7
|
+
|
|
8
|
+
import zmq
|
|
9
|
+
|
|
10
|
+
from scaler.config.types.object_storage_server import ObjectStorageAddressConfig
|
|
11
|
+
from scaler.config.types.zmq import ZMQConfig
|
|
12
|
+
from scaler.io.async_connector import ZMQAsyncConnector
|
|
13
|
+
from scaler.io.mixins import AsyncConnector, AsyncObjectStorageConnector
|
|
14
|
+
from scaler.io.utility import create_async_object_storage_connector
|
|
15
|
+
from scaler.protocol.python.message import (
|
|
16
|
+
ClientDisconnect,
|
|
17
|
+
DisconnectRequest,
|
|
18
|
+
ObjectInstruction,
|
|
19
|
+
Task,
|
|
20
|
+
TaskCancel,
|
|
21
|
+
WorkerHeartbeatEcho,
|
|
22
|
+
)
|
|
23
|
+
from scaler.protocol.python.mixins import Message
|
|
24
|
+
from scaler.utility.event_loop import create_async_loop_routine, register_event_loop
|
|
25
|
+
from scaler.utility.exceptions import ClientShutdownException
|
|
26
|
+
from scaler.utility.identifiers import WorkerID
|
|
27
|
+
from scaler.utility.logging.utility import setup_logger
|
|
28
|
+
from scaler.worker.agent.timeout_manager import VanillaTimeoutManager
|
|
29
|
+
from scaler.worker_adapter.symphony.heartbeat_manager import SymphonyHeartbeatManager
|
|
30
|
+
from scaler.worker_adapter.symphony.task_manager import SymphonyTaskManager
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SymphonyWorker(multiprocessing.get_context("spawn").Process): # type: ignore
|
|
34
|
+
"""
|
|
35
|
+
SymphonyWorker is an implementation of a worker that can handle multiple tasks concurrently.
|
|
36
|
+
Most of the task execution logic is handled by SymphonyTaskManager.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
name: str,
|
|
42
|
+
address: ZMQConfig,
|
|
43
|
+
object_storage_address: Optional[ObjectStorageAddressConfig],
|
|
44
|
+
service_name: str,
|
|
45
|
+
capabilities: Dict[str, int],
|
|
46
|
+
base_concurrency: int,
|
|
47
|
+
heartbeat_interval_seconds: int,
|
|
48
|
+
death_timeout_seconds: int,
|
|
49
|
+
task_queue_size: int,
|
|
50
|
+
io_threads: int,
|
|
51
|
+
event_loop: str,
|
|
52
|
+
):
|
|
53
|
+
multiprocessing.Process.__init__(self, name="Agent")
|
|
54
|
+
|
|
55
|
+
self._event_loop = event_loop
|
|
56
|
+
self._name = name
|
|
57
|
+
self._address = address
|
|
58
|
+
self._object_storage_address = object_storage_address
|
|
59
|
+
self._capabilities = capabilities
|
|
60
|
+
self._io_threads = io_threads
|
|
61
|
+
|
|
62
|
+
self._ident = WorkerID.generate_worker_id(name) # _identity is internal to multiprocessing.Process
|
|
63
|
+
|
|
64
|
+
self._service_name = service_name
|
|
65
|
+
self._base_concurrency = base_concurrency
|
|
66
|
+
|
|
67
|
+
self._heartbeat_interval_seconds = heartbeat_interval_seconds
|
|
68
|
+
self._death_timeout_seconds = death_timeout_seconds
|
|
69
|
+
self._task_queue_size = task_queue_size
|
|
70
|
+
|
|
71
|
+
self._context: Optional[zmq.asyncio.Context] = None
|
|
72
|
+
self._connector_external: Optional[AsyncConnector] = None
|
|
73
|
+
self._connector_storage: Optional[AsyncObjectStorageConnector] = None
|
|
74
|
+
self._task_manager: Optional[SymphonyTaskManager] = None
|
|
75
|
+
self._heartbeat_manager: Optional[SymphonyHeartbeatManager] = None
|
|
76
|
+
|
|
77
|
+
"""
|
|
78
|
+
Sometimes the first message received is not a heartbeat echo, so we need to backoff processing other tasks
|
|
79
|
+
until we receive the first heartbeat echo.
|
|
80
|
+
"""
|
|
81
|
+
self._heartbeat_received: bool = False
|
|
82
|
+
self._backoff_message_queue: deque = deque()
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def identity(self) -> WorkerID:
|
|
86
|
+
return self._ident
|
|
87
|
+
|
|
88
|
+
def run(self) -> None:
|
|
89
|
+
self.__initialize()
|
|
90
|
+
self.__run_forever()
|
|
91
|
+
|
|
92
|
+
def __initialize(self):
|
|
93
|
+
setup_logger()
|
|
94
|
+
register_event_loop(self._event_loop)
|
|
95
|
+
|
|
96
|
+
self._context = zmq.asyncio.Context()
|
|
97
|
+
self._connector_external = ZMQAsyncConnector(
|
|
98
|
+
context=self._context,
|
|
99
|
+
name=self.name,
|
|
100
|
+
socket_type=zmq.DEALER,
|
|
101
|
+
address=self._address,
|
|
102
|
+
bind_or_connect="connect",
|
|
103
|
+
callback=self.__on_receive_external,
|
|
104
|
+
identity=self._ident,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
self._connector_storage = create_async_object_storage_connector()
|
|
108
|
+
|
|
109
|
+
self._heartbeat_manager = SymphonyHeartbeatManager(
|
|
110
|
+
object_storage_address=self._object_storage_address,
|
|
111
|
+
capabilities=self._capabilities,
|
|
112
|
+
task_queue_size=self._task_queue_size,
|
|
113
|
+
)
|
|
114
|
+
self._task_manager = SymphonyTaskManager(
|
|
115
|
+
base_concurrency=self._base_concurrency, service_name=self._service_name
|
|
116
|
+
)
|
|
117
|
+
self._timeout_manager = VanillaTimeoutManager(death_timeout_seconds=self._death_timeout_seconds)
|
|
118
|
+
|
|
119
|
+
# register
|
|
120
|
+
self._heartbeat_manager.register(
|
|
121
|
+
connector_external=self._connector_external,
|
|
122
|
+
connector_storage=self._connector_storage,
|
|
123
|
+
worker_task_manager=self._task_manager,
|
|
124
|
+
timeout_manager=self._timeout_manager,
|
|
125
|
+
)
|
|
126
|
+
self._task_manager.register(
|
|
127
|
+
connector_external=self._connector_external,
|
|
128
|
+
connector_storage=self._connector_storage,
|
|
129
|
+
heartbeat_manager=self._heartbeat_manager,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
self._loop = asyncio.get_event_loop()
|
|
133
|
+
self.__register_signal()
|
|
134
|
+
self._task = self._loop.create_task(self.__get_loops())
|
|
135
|
+
|
|
136
|
+
async def __on_receive_external(self, message: Message):
|
|
137
|
+
if not self._heartbeat_received and not isinstance(message, WorkerHeartbeatEcho):
|
|
138
|
+
self._backoff_message_queue.append(message)
|
|
139
|
+
return
|
|
140
|
+
|
|
141
|
+
if isinstance(message, WorkerHeartbeatEcho):
|
|
142
|
+
await self._heartbeat_manager.on_heartbeat_echo(message)
|
|
143
|
+
self._heartbeat_received = True
|
|
144
|
+
|
|
145
|
+
while self._backoff_message_queue:
|
|
146
|
+
backoff_message = self._backoff_message_queue.popleft()
|
|
147
|
+
await self.__on_receive_external(backoff_message)
|
|
148
|
+
|
|
149
|
+
return
|
|
150
|
+
|
|
151
|
+
if isinstance(message, Task):
|
|
152
|
+
await self._task_manager.on_task_new(message)
|
|
153
|
+
return
|
|
154
|
+
|
|
155
|
+
if isinstance(message, TaskCancel):
|
|
156
|
+
await self._task_manager.on_cancel_task(message)
|
|
157
|
+
return
|
|
158
|
+
|
|
159
|
+
if isinstance(message, ObjectInstruction):
|
|
160
|
+
await self._task_manager.on_object_instruction(message)
|
|
161
|
+
return
|
|
162
|
+
|
|
163
|
+
if isinstance(message, ClientDisconnect):
|
|
164
|
+
if message.disconnect_type == ClientDisconnect.DisconnectType.Shutdown:
|
|
165
|
+
raise ClientShutdownException("received client shutdown, quitting")
|
|
166
|
+
logging.error(f"Worker received invalid ClientDisconnect type, ignoring {message=}")
|
|
167
|
+
return
|
|
168
|
+
|
|
169
|
+
raise TypeError(f"Unknown {message=}")
|
|
170
|
+
|
|
171
|
+
async def __get_loops(self):
|
|
172
|
+
if self._object_storage_address is not None:
|
|
173
|
+
# With a manually set storage address, immediately connect to the object storage server.
|
|
174
|
+
await self._connector_storage.connect(self._object_storage_address.host, self._object_storage_address.port)
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
await asyncio.gather(
|
|
178
|
+
create_async_loop_routine(self._connector_external.routine, 0),
|
|
179
|
+
create_async_loop_routine(self._connector_storage.routine, 0),
|
|
180
|
+
create_async_loop_routine(self._heartbeat_manager.routine, self._heartbeat_interval_seconds),
|
|
181
|
+
create_async_loop_routine(self._timeout_manager.routine, 1),
|
|
182
|
+
create_async_loop_routine(self._task_manager.process_task, 0),
|
|
183
|
+
create_async_loop_routine(self._task_manager.resolve_tasks, 0),
|
|
184
|
+
)
|
|
185
|
+
except asyncio.CancelledError:
|
|
186
|
+
pass
|
|
187
|
+
except (ClientShutdownException, TimeoutError) as e:
|
|
188
|
+
logging.info(f"{self.identity!r}: {str(e)}")
|
|
189
|
+
except Exception as e:
|
|
190
|
+
logging.exception(f"{self.identity!r}: failed with unhandled exception:\n{e}")
|
|
191
|
+
|
|
192
|
+
await self._connector_external.send(DisconnectRequest.new_msg(self.identity))
|
|
193
|
+
|
|
194
|
+
self._connector_external.destroy()
|
|
195
|
+
logging.info(f"{self.identity!r}: quit")
|
|
196
|
+
|
|
197
|
+
def __run_forever(self):
|
|
198
|
+
self._loop.run_until_complete(self._task)
|
|
199
|
+
|
|
200
|
+
def __register_signal(self):
|
|
201
|
+
self._loop.add_signal_handler(signal.SIGINT, self.__destroy)
|
|
202
|
+
|
|
203
|
+
def __destroy(self):
|
|
204
|
+
self._task.cancel()
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import signal
|
|
3
|
+
import uuid
|
|
4
|
+
from typing import Dict
|
|
5
|
+
|
|
6
|
+
from aiohttp import web
|
|
7
|
+
from aiohttp.web_request import Request
|
|
8
|
+
|
|
9
|
+
from scaler.config.section.symphony_worker_adapter import SymphonyWorkerConfig
|
|
10
|
+
from scaler.utility.identifiers import WorkerID
|
|
11
|
+
from scaler.worker_adapter.common import CapacityExceededError, WorkerGroupID, WorkerGroupNotFoundError
|
|
12
|
+
from scaler.worker_adapter.symphony.worker import SymphonyWorker
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SymphonyWorkerAdapter:
|
|
16
|
+
def __init__(self, config: SymphonyWorkerConfig):
|
|
17
|
+
self._address = config.worker_adapter_config.scheduler_address
|
|
18
|
+
self._object_storage_address = config.worker_adapter_config.object_storage_address
|
|
19
|
+
self._service_name = config.service_name
|
|
20
|
+
self._base_concurrency = config.worker_adapter_config.max_workers
|
|
21
|
+
self._capabilities = config.worker_config.per_worker_capabilities.capabilities
|
|
22
|
+
self._io_threads = config.worker_io_threads
|
|
23
|
+
self._task_queue_size = config.worker_config.per_worker_task_queue_size
|
|
24
|
+
self._heartbeat_interval_seconds = config.worker_config.heartbeat_interval_seconds
|
|
25
|
+
self._death_timeout_seconds = config.worker_config.death_timeout_seconds
|
|
26
|
+
self._event_loop = config.event_loop
|
|
27
|
+
self._logging_paths = config.logging_config.paths
|
|
28
|
+
self._logging_level = config.logging_config.level
|
|
29
|
+
self._logging_config_file = config.logging_config.config_file
|
|
30
|
+
|
|
31
|
+
"""
|
|
32
|
+
Although a worker group can contain multiple workers, in this Symphony adapter implementation,
|
|
33
|
+
there will be only one worker group which contains one Symphony worker.
|
|
34
|
+
"""
|
|
35
|
+
self._worker_groups: Dict[WorkerGroupID, Dict[WorkerID, SymphonyWorker]] = {}
|
|
36
|
+
|
|
37
|
+
async def start_worker_group(self) -> WorkerGroupID:
|
|
38
|
+
if self._worker_groups:
|
|
39
|
+
raise CapacityExceededError("Symphony worker already started")
|
|
40
|
+
|
|
41
|
+
worker = SymphonyWorker(
|
|
42
|
+
name=f"SYM|{uuid.uuid4().hex}",
|
|
43
|
+
address=self._address,
|
|
44
|
+
object_storage_address=self._object_storage_address,
|
|
45
|
+
service_name=self._service_name,
|
|
46
|
+
base_concurrency=self._base_concurrency,
|
|
47
|
+
capabilities=self._capabilities,
|
|
48
|
+
io_threads=self._io_threads,
|
|
49
|
+
task_queue_size=self._task_queue_size,
|
|
50
|
+
heartbeat_interval_seconds=self._heartbeat_interval_seconds,
|
|
51
|
+
death_timeout_seconds=self._death_timeout_seconds,
|
|
52
|
+
event_loop=self._event_loop,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
worker.start()
|
|
56
|
+
worker_group_id = f"symphony-{uuid.uuid4().hex}".encode()
|
|
57
|
+
self._worker_groups[worker_group_id] = {worker.identity: worker}
|
|
58
|
+
return worker_group_id
|
|
59
|
+
|
|
60
|
+
async def shutdown_worker_group(self, worker_group_id: WorkerGroupID):
|
|
61
|
+
if worker_group_id not in self._worker_groups:
|
|
62
|
+
raise WorkerGroupNotFoundError(f"Worker group with ID {worker_group_id.decode()} does not exist.")
|
|
63
|
+
|
|
64
|
+
for worker in self._worker_groups[worker_group_id].values():
|
|
65
|
+
os.kill(worker.pid, signal.SIGINT)
|
|
66
|
+
worker.join()
|
|
67
|
+
|
|
68
|
+
self._worker_groups.pop(worker_group_id)
|
|
69
|
+
|
|
70
|
+
async def webhook_handler(self, request: Request):
|
|
71
|
+
request_json = await request.json()
|
|
72
|
+
|
|
73
|
+
if "action" not in request_json:
|
|
74
|
+
return web.json_response({"error": "No action specified"}, status=web.HTTPBadRequest.status_code)
|
|
75
|
+
|
|
76
|
+
action = request_json["action"]
|
|
77
|
+
|
|
78
|
+
if action == "get_worker_adapter_info":
|
|
79
|
+
return web.json_response(
|
|
80
|
+
{"max_worker_groups": 1, "workers_per_group": 1, "base_capabilities": self._capabilities},
|
|
81
|
+
status=web.HTTPOk.status_code,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
elif action == "start_worker_group":
|
|
85
|
+
try:
|
|
86
|
+
worker_group_id = await self.start_worker_group()
|
|
87
|
+
except CapacityExceededError as e:
|
|
88
|
+
return web.json_response({"error": str(e)}, status=web.HTTPTooManyRequests.status_code)
|
|
89
|
+
except Exception as e:
|
|
90
|
+
return web.json_response({"error": str(e)}, status=web.HTTPInternalServerError.status_code)
|
|
91
|
+
|
|
92
|
+
return web.json_response(
|
|
93
|
+
{
|
|
94
|
+
"status": "Worker group started",
|
|
95
|
+
"worker_group_id": worker_group_id.decode(),
|
|
96
|
+
"worker_ids": [worker_id.decode() for worker_id in self._worker_groups[worker_group_id].keys()],
|
|
97
|
+
},
|
|
98
|
+
status=web.HTTPOk.status_code,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
elif action == "shutdown_worker_group":
|
|
102
|
+
if "worker_group_id" not in request_json:
|
|
103
|
+
return web.json_response(
|
|
104
|
+
{"error": "No worker_group_id specified"}, status=web.HTTPBadRequest.status_code
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
worker_group_id = request_json["worker_group_id"].encode()
|
|
108
|
+
try:
|
|
109
|
+
await self.shutdown_worker_group(worker_group_id)
|
|
110
|
+
except WorkerGroupNotFoundError as e:
|
|
111
|
+
return web.json_response({"error": str(e)}, status=web.HTTPNotFound.status_code)
|
|
112
|
+
except Exception as e:
|
|
113
|
+
return web.json_response({"error": str(e)}, status=web.HTTPInternalServerError.status_code)
|
|
114
|
+
|
|
115
|
+
return web.json_response({"status": "Worker group shutdown"}, status=web.HTTPOk.status_code)
|
|
116
|
+
|
|
117
|
+
else:
|
|
118
|
+
return web.json_response({"error": "Unknown action"}, status=web.HTTPBadRequest.status_code)
|
|
119
|
+
|
|
120
|
+
def create_app(self):
|
|
121
|
+
app = web.Application()
|
|
122
|
+
app.router.add_post("/", self.webhook_handler)
|
|
123
|
+
return app
|