opengris-scaler 1.12.7__cp312-cp312-musllinux_1_2_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of opengris-scaler might be problematic. Click here for more details.
- opengris_scaler-1.12.7.dist-info/METADATA +729 -0
- opengris_scaler-1.12.7.dist-info/RECORD +234 -0
- opengris_scaler-1.12.7.dist-info/WHEEL +5 -0
- opengris_scaler-1.12.7.dist-info/entry_points.txt +9 -0
- opengris_scaler-1.12.7.dist-info/licenses/LICENSE +201 -0
- opengris_scaler-1.12.7.dist-info/licenses/LICENSE.spdx +7 -0
- opengris_scaler-1.12.7.dist-info/licenses/NOTICE +8 -0
- opengris_scaler.libs/libcapnp-1-61c06778.1.0.so +0 -0
- opengris_scaler.libs/libgcc_s-2298274a.so.1 +0 -0
- opengris_scaler.libs/libkj-1-21b63b70.1.0.so +0 -0
- opengris_scaler.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
- scaler/CMakeLists.txt +11 -0
- scaler/__init__.py +14 -0
- scaler/about.py +5 -0
- scaler/client/__init__.py +0 -0
- scaler/client/agent/__init__.py +0 -0
- scaler/client/agent/client_agent.py +210 -0
- scaler/client/agent/disconnect_manager.py +27 -0
- scaler/client/agent/future_manager.py +112 -0
- scaler/client/agent/heartbeat_manager.py +74 -0
- scaler/client/agent/mixins.py +89 -0
- scaler/client/agent/object_manager.py +98 -0
- scaler/client/agent/task_manager.py +64 -0
- scaler/client/client.py +635 -0
- scaler/client/future.py +252 -0
- scaler/client/object_buffer.py +129 -0
- scaler/client/object_reference.py +25 -0
- scaler/client/serializer/__init__.py +0 -0
- scaler/client/serializer/default.py +16 -0
- scaler/client/serializer/mixins.py +38 -0
- scaler/cluster/__init__.py +0 -0
- scaler/cluster/cluster.py +115 -0
- scaler/cluster/combo.py +148 -0
- scaler/cluster/object_storage_server.py +45 -0
- scaler/cluster/scheduler.py +83 -0
- scaler/config/__init__.py +0 -0
- scaler/config/defaults.py +87 -0
- scaler/config/loader.py +95 -0
- scaler/config/mixins.py +15 -0
- scaler/config/section/__init__.py +0 -0
- scaler/config/section/cluster.py +56 -0
- scaler/config/section/native_worker_adapter.py +44 -0
- scaler/config/section/object_storage_server.py +7 -0
- scaler/config/section/scheduler.py +53 -0
- scaler/config/section/symphony_worker_adapter.py +47 -0
- scaler/config/section/top.py +13 -0
- scaler/config/section/webui.py +16 -0
- scaler/config/types/__init__.py +0 -0
- scaler/config/types/object_storage_server.py +45 -0
- scaler/config/types/worker.py +57 -0
- scaler/config/types/zmq.py +79 -0
- scaler/entry_points/__init__.py +0 -0
- scaler/entry_points/cluster.py +133 -0
- scaler/entry_points/object_storage_server.py +41 -0
- scaler/entry_points/scheduler.py +135 -0
- scaler/entry_points/top.py +286 -0
- scaler/entry_points/webui.py +26 -0
- scaler/entry_points/worker_adapter_native.py +137 -0
- scaler/entry_points/worker_adapter_symphony.py +102 -0
- scaler/io/__init__.py +0 -0
- scaler/io/async_binder.py +85 -0
- scaler/io/async_connector.py +95 -0
- scaler/io/async_object_storage_connector.py +185 -0
- scaler/io/mixins.py +154 -0
- scaler/io/sync_connector.py +68 -0
- scaler/io/sync_object_storage_connector.py +185 -0
- scaler/io/sync_subscriber.py +83 -0
- scaler/io/utility.py +31 -0
- scaler/io/ymq/CMakeLists.txt +98 -0
- scaler/io/ymq/__init__.py +0 -0
- scaler/io/ymq/_ymq.pyi +96 -0
- scaler/io/ymq/_ymq.so +0 -0
- scaler/io/ymq/bytes.h +114 -0
- scaler/io/ymq/common.h +29 -0
- scaler/io/ymq/configuration.h +60 -0
- scaler/io/ymq/epoll_context.cpp +185 -0
- scaler/io/ymq/epoll_context.h +85 -0
- scaler/io/ymq/error.h +132 -0
- scaler/io/ymq/event_loop.h +55 -0
- scaler/io/ymq/event_loop_thread.cpp +64 -0
- scaler/io/ymq/event_loop_thread.h +46 -0
- scaler/io/ymq/event_manager.h +81 -0
- scaler/io/ymq/file_descriptor.h +203 -0
- scaler/io/ymq/interruptive_concurrent_queue.h +169 -0
- scaler/io/ymq/io_context.cpp +98 -0
- scaler/io/ymq/io_context.h +44 -0
- scaler/io/ymq/io_socket.cpp +299 -0
- scaler/io/ymq/io_socket.h +121 -0
- scaler/io/ymq/iocp_context.cpp +102 -0
- scaler/io/ymq/iocp_context.h +83 -0
- scaler/io/ymq/logging.h +163 -0
- scaler/io/ymq/message.h +15 -0
- scaler/io/ymq/message_connection.h +16 -0
- scaler/io/ymq/message_connection_tcp.cpp +672 -0
- scaler/io/ymq/message_connection_tcp.h +96 -0
- scaler/io/ymq/network_utils.h +179 -0
- scaler/io/ymq/pymod_ymq/bytes.h +113 -0
- scaler/io/ymq/pymod_ymq/exception.h +124 -0
- scaler/io/ymq/pymod_ymq/gil.h +15 -0
- scaler/io/ymq/pymod_ymq/io_context.h +166 -0
- scaler/io/ymq/pymod_ymq/io_socket.h +285 -0
- scaler/io/ymq/pymod_ymq/message.h +99 -0
- scaler/io/ymq/pymod_ymq/python.h +153 -0
- scaler/io/ymq/pymod_ymq/ymq.cpp +23 -0
- scaler/io/ymq/pymod_ymq/ymq.h +357 -0
- scaler/io/ymq/readme.md +114 -0
- scaler/io/ymq/simple_interface.cpp +80 -0
- scaler/io/ymq/simple_interface.h +24 -0
- scaler/io/ymq/tcp_client.cpp +367 -0
- scaler/io/ymq/tcp_client.h +75 -0
- scaler/io/ymq/tcp_operations.h +41 -0
- scaler/io/ymq/tcp_server.cpp +410 -0
- scaler/io/ymq/tcp_server.h +79 -0
- scaler/io/ymq/third_party/concurrentqueue.h +3747 -0
- scaler/io/ymq/timed_queue.h +272 -0
- scaler/io/ymq/timestamp.h +102 -0
- scaler/io/ymq/typedefs.h +20 -0
- scaler/io/ymq/utils.h +34 -0
- scaler/io/ymq/ymq.py +130 -0
- scaler/object_storage/CMakeLists.txt +50 -0
- scaler/object_storage/__init__.py +0 -0
- scaler/object_storage/constants.h +11 -0
- scaler/object_storage/defs.h +14 -0
- scaler/object_storage/io_helper.cpp +44 -0
- scaler/object_storage/io_helper.h +9 -0
- scaler/object_storage/message.cpp +56 -0
- scaler/object_storage/message.h +130 -0
- scaler/object_storage/object_manager.cpp +126 -0
- scaler/object_storage/object_manager.h +52 -0
- scaler/object_storage/object_storage_server.cpp +359 -0
- scaler/object_storage/object_storage_server.h +126 -0
- scaler/object_storage/object_storage_server.so +0 -0
- scaler/object_storage/pymod_object_storage_server.cpp +104 -0
- scaler/protocol/__init__.py +0 -0
- scaler/protocol/capnp/__init__.py +0 -0
- scaler/protocol/capnp/_python.py +6 -0
- scaler/protocol/capnp/common.capnp +63 -0
- scaler/protocol/capnp/message.capnp +216 -0
- scaler/protocol/capnp/object_storage.capnp +52 -0
- scaler/protocol/capnp/status.capnp +73 -0
- scaler/protocol/introduction.md +105 -0
- scaler/protocol/python/__init__.py +0 -0
- scaler/protocol/python/common.py +135 -0
- scaler/protocol/python/message.py +726 -0
- scaler/protocol/python/mixins.py +13 -0
- scaler/protocol/python/object_storage.py +118 -0
- scaler/protocol/python/status.py +279 -0
- scaler/protocol/worker.md +228 -0
- scaler/scheduler/__init__.py +0 -0
- scaler/scheduler/allocate_policy/__init__.py +0 -0
- scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
- scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
- scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
- scaler/scheduler/allocate_policy/mixins.py +55 -0
- scaler/scheduler/controllers/__init__.py +0 -0
- scaler/scheduler/controllers/balance_controller.py +65 -0
- scaler/scheduler/controllers/client_controller.py +131 -0
- scaler/scheduler/controllers/config_controller.py +31 -0
- scaler/scheduler/controllers/graph_controller.py +424 -0
- scaler/scheduler/controllers/information_controller.py +81 -0
- scaler/scheduler/controllers/mixins.py +201 -0
- scaler/scheduler/controllers/object_controller.py +147 -0
- scaler/scheduler/controllers/scaling_controller.py +86 -0
- scaler/scheduler/controllers/task_controller.py +373 -0
- scaler/scheduler/controllers/worker_controller.py +168 -0
- scaler/scheduler/object_usage/__init__.py +0 -0
- scaler/scheduler/object_usage/object_tracker.py +131 -0
- scaler/scheduler/scheduler.py +253 -0
- scaler/scheduler/task/__init__.py +0 -0
- scaler/scheduler/task/task_state_machine.py +92 -0
- scaler/scheduler/task/task_state_manager.py +61 -0
- scaler/ui/__init__.py +0 -0
- scaler/ui/constants.py +9 -0
- scaler/ui/live_display.py +118 -0
- scaler/ui/memory_window.py +146 -0
- scaler/ui/setting_page.py +47 -0
- scaler/ui/task_graph.py +370 -0
- scaler/ui/task_log.py +83 -0
- scaler/ui/utility.py +35 -0
- scaler/ui/webui.py +125 -0
- scaler/ui/worker_processors.py +85 -0
- scaler/utility/__init__.py +0 -0
- scaler/utility/debug.py +19 -0
- scaler/utility/event_list.py +63 -0
- scaler/utility/event_loop.py +58 -0
- scaler/utility/exceptions.py +42 -0
- scaler/utility/formatter.py +44 -0
- scaler/utility/graph/__init__.py +0 -0
- scaler/utility/graph/optimization.py +27 -0
- scaler/utility/graph/topological_sorter.py +11 -0
- scaler/utility/graph/topological_sorter_graphblas.py +174 -0
- scaler/utility/identifiers.py +105 -0
- scaler/utility/logging/__init__.py +0 -0
- scaler/utility/logging/decorators.py +25 -0
- scaler/utility/logging/scoped_logger.py +33 -0
- scaler/utility/logging/utility.py +183 -0
- scaler/utility/many_to_many_dict.py +123 -0
- scaler/utility/metadata/__init__.py +0 -0
- scaler/utility/metadata/profile_result.py +31 -0
- scaler/utility/metadata/task_flags.py +30 -0
- scaler/utility/mixins.py +13 -0
- scaler/utility/network_util.py +7 -0
- scaler/utility/one_to_many_dict.py +72 -0
- scaler/utility/queues/__init__.py +0 -0
- scaler/utility/queues/async_indexed_queue.py +37 -0
- scaler/utility/queues/async_priority_queue.py +70 -0
- scaler/utility/queues/async_sorted_priority_queue.py +45 -0
- scaler/utility/queues/indexed_queue.py +114 -0
- scaler/utility/serialization.py +9 -0
- scaler/version.txt +1 -0
- scaler/worker/__init__.py +0 -0
- scaler/worker/agent/__init__.py +0 -0
- scaler/worker/agent/heartbeat_manager.py +107 -0
- scaler/worker/agent/mixins.py +137 -0
- scaler/worker/agent/processor/__init__.py +0 -0
- scaler/worker/agent/processor/object_cache.py +107 -0
- scaler/worker/agent/processor/processor.py +279 -0
- scaler/worker/agent/processor/streaming_buffer.py +28 -0
- scaler/worker/agent/processor_holder.py +145 -0
- scaler/worker/agent/processor_manager.py +365 -0
- scaler/worker/agent/profiling_manager.py +109 -0
- scaler/worker/agent/task_manager.py +150 -0
- scaler/worker/agent/timeout_manager.py +19 -0
- scaler/worker/preload.py +84 -0
- scaler/worker/worker.py +264 -0
- scaler/worker_adapter/__init__.py +0 -0
- scaler/worker_adapter/native.py +154 -0
- scaler/worker_adapter/symphony/__init__.py +0 -0
- scaler/worker_adapter/symphony/callback.py +45 -0
- scaler/worker_adapter/symphony/heartbeat_manager.py +79 -0
- scaler/worker_adapter/symphony/message.py +24 -0
- scaler/worker_adapter/symphony/task_manager.py +288 -0
- scaler/worker_adapter/symphony/worker.py +205 -0
- scaler/worker_adapter/symphony/worker_adapter.py +142 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
from typing import Any, Optional, Set
|
|
3
|
+
|
|
4
|
+
from scaler.protocol.python.common import ObjectMetadata
|
|
5
|
+
from scaler.protocol.python.message import (
|
|
6
|
+
ClientDisconnect,
|
|
7
|
+
ClientHeartbeat,
|
|
8
|
+
DisconnectRequest,
|
|
9
|
+
GraphTask,
|
|
10
|
+
InformationRequest,
|
|
11
|
+
InformationSnapshot,
|
|
12
|
+
ObjectInstruction,
|
|
13
|
+
Task,
|
|
14
|
+
TaskCancel,
|
|
15
|
+
TaskCancelConfirm,
|
|
16
|
+
TaskResult,
|
|
17
|
+
WorkerHeartbeat,
|
|
18
|
+
)
|
|
19
|
+
from scaler.utility.identifiers import ClientID, ObjectID, TaskID, WorkerID
|
|
20
|
+
from scaler.utility.mixins import Reporter
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ConfigController(metaclass=abc.ABCMeta):
|
|
24
|
+
@abc.abstractmethod
|
|
25
|
+
def get_config(self, path: str) -> Any:
|
|
26
|
+
raise NotImplementedError()
|
|
27
|
+
|
|
28
|
+
@abc.abstractmethod
|
|
29
|
+
def update_config(self, path: str, value: Any):
|
|
30
|
+
raise NotImplementedError()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ObjectController(Reporter):
|
|
34
|
+
@abc.abstractmethod
|
|
35
|
+
async def on_object_instruction(self, source: bytes, request: ObjectInstruction):
|
|
36
|
+
raise NotImplementedError()
|
|
37
|
+
|
|
38
|
+
@abc.abstractmethod
|
|
39
|
+
def on_add_object(
|
|
40
|
+
self,
|
|
41
|
+
client_id: ClientID,
|
|
42
|
+
object_id: ObjectID,
|
|
43
|
+
object_type: ObjectMetadata.ObjectContentType,
|
|
44
|
+
object_name: bytes,
|
|
45
|
+
):
|
|
46
|
+
raise NotImplementedError()
|
|
47
|
+
|
|
48
|
+
@abc.abstractmethod
|
|
49
|
+
def on_del_objects(self, client_id: ClientID, object_ids: Set[ObjectID]):
|
|
50
|
+
raise NotImplementedError()
|
|
51
|
+
|
|
52
|
+
@abc.abstractmethod
|
|
53
|
+
def clean_client(self, client_id: ClientID):
|
|
54
|
+
raise NotImplementedError()
|
|
55
|
+
|
|
56
|
+
@abc.abstractmethod
|
|
57
|
+
def has_object(self, object_id: ObjectID) -> bool:
|
|
58
|
+
raise NotImplementedError()
|
|
59
|
+
|
|
60
|
+
@abc.abstractmethod
|
|
61
|
+
def get_object_name(self, object_id: ObjectID) -> bytes:
|
|
62
|
+
raise NotImplementedError()
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class ClientController(Reporter):
|
|
66
|
+
@abc.abstractmethod
|
|
67
|
+
def get_client_task_ids(self, client_id: ClientID) -> Set[TaskID]:
|
|
68
|
+
raise NotImplementedError()
|
|
69
|
+
|
|
70
|
+
@abc.abstractmethod
|
|
71
|
+
def has_client_id(self, client_id: ClientID) -> bool:
|
|
72
|
+
raise NotImplementedError()
|
|
73
|
+
|
|
74
|
+
@abc.abstractmethod
|
|
75
|
+
def get_client_id(self, task_id: TaskID) -> Optional[ClientID]:
|
|
76
|
+
raise NotImplementedError()
|
|
77
|
+
|
|
78
|
+
@abc.abstractmethod
|
|
79
|
+
def on_task_begin(self, client_id: ClientID, task_id: TaskID):
|
|
80
|
+
raise NotImplementedError()
|
|
81
|
+
|
|
82
|
+
@abc.abstractmethod
|
|
83
|
+
def on_task_finish(self, task_id: TaskID) -> bytes:
|
|
84
|
+
raise NotImplementedError()
|
|
85
|
+
|
|
86
|
+
@abc.abstractmethod
|
|
87
|
+
async def on_heartbeat(self, client_id: ClientID, info: ClientHeartbeat):
|
|
88
|
+
raise NotImplementedError()
|
|
89
|
+
|
|
90
|
+
@abc.abstractmethod
|
|
91
|
+
async def on_client_disconnect(self, client_id: ClientID, request: ClientDisconnect):
|
|
92
|
+
raise NotImplementedError()
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class GraphTaskController(Reporter):
|
|
96
|
+
@abc.abstractmethod
|
|
97
|
+
async def on_graph_task(self, client_id: ClientID, graph_task: GraphTask):
|
|
98
|
+
raise NotImplementedError()
|
|
99
|
+
|
|
100
|
+
@abc.abstractmethod
|
|
101
|
+
async def on_graph_task_cancel(self, graph_task_cancel: TaskCancel):
|
|
102
|
+
raise NotImplementedError()
|
|
103
|
+
|
|
104
|
+
@abc.abstractmethod
|
|
105
|
+
async def on_graph_sub_task_cancel_confirm(self, task_cancel_confirm: TaskCancelConfirm):
|
|
106
|
+
raise NotImplementedError()
|
|
107
|
+
|
|
108
|
+
@abc.abstractmethod
|
|
109
|
+
async def on_graph_sub_task_result(self, result: TaskResult) -> bool:
|
|
110
|
+
raise NotImplementedError()
|
|
111
|
+
|
|
112
|
+
@abc.abstractmethod
|
|
113
|
+
def is_graph_subtask(self, task_id: TaskID) -> bool:
|
|
114
|
+
raise NotImplementedError()
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class TaskController(Reporter):
|
|
118
|
+
@abc.abstractmethod
|
|
119
|
+
async def on_task_new(self, task: Task):
|
|
120
|
+
raise NotImplementedError()
|
|
121
|
+
|
|
122
|
+
@abc.abstractmethod
|
|
123
|
+
async def on_task_cancel(self, client_id: ClientID, task_cancel: TaskCancel):
|
|
124
|
+
raise NotImplementedError()
|
|
125
|
+
|
|
126
|
+
@abc.abstractmethod
|
|
127
|
+
async def on_task_balance_cancel(self, task_id: TaskID):
|
|
128
|
+
raise NotImplementedError()
|
|
129
|
+
|
|
130
|
+
@abc.abstractmethod
|
|
131
|
+
async def on_task_cancel_confirm(self, task_cancel_confirm: TaskCancelConfirm):
|
|
132
|
+
raise NotImplementedError()
|
|
133
|
+
|
|
134
|
+
@abc.abstractmethod
|
|
135
|
+
async def on_task_result(self, result: TaskResult):
|
|
136
|
+
raise NotImplementedError()
|
|
137
|
+
|
|
138
|
+
@abc.abstractmethod
|
|
139
|
+
async def on_worker_connect(self, worker_id: WorkerID):
|
|
140
|
+
raise NotImplementedError()
|
|
141
|
+
|
|
142
|
+
@abc.abstractmethod
|
|
143
|
+
async def on_worker_disconnect(self, task_id: TaskID, worker_id: WorkerID):
|
|
144
|
+
raise NotImplementedError()
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class WorkerController(Reporter):
|
|
148
|
+
@abc.abstractmethod
|
|
149
|
+
def acquire_worker(self, task: Task) -> Optional[WorkerID]:
|
|
150
|
+
"""this acquires worker should be atomic, means it cannot be async decorated, otherwise it will create gap that
|
|
151
|
+
get worker but task is not send to worker, and cannot find task in the worker state"""
|
|
152
|
+
|
|
153
|
+
# TODO: this function should return things that expose 3 kinds of information:
|
|
154
|
+
# TODO: 1. worker id as bytes if have capacity and able to assign to worker id
|
|
155
|
+
# TODO: 2. capacity is full, and unable to add new task
|
|
156
|
+
# TODO: 3. capacity is not full, but all the workers are busy right now, so tasks will be queued
|
|
157
|
+
raise NotImplementedError()
|
|
158
|
+
|
|
159
|
+
@abc.abstractmethod
|
|
160
|
+
async def on_task_cancel(self, task_cancel: TaskCancel) -> bytes:
|
|
161
|
+
raise NotImplementedError()
|
|
162
|
+
|
|
163
|
+
@abc.abstractmethod
|
|
164
|
+
async def on_task_done(self, task_id: TaskID):
|
|
165
|
+
raise NotImplementedError()
|
|
166
|
+
|
|
167
|
+
@abc.abstractmethod
|
|
168
|
+
async def on_heartbeat(self, worker_id: WorkerID, info: WorkerHeartbeat):
|
|
169
|
+
raise NotImplementedError()
|
|
170
|
+
|
|
171
|
+
@abc.abstractmethod
|
|
172
|
+
async def on_client_shutdown(self, client_id: ClientID):
|
|
173
|
+
raise NotImplementedError()
|
|
174
|
+
|
|
175
|
+
@abc.abstractmethod
|
|
176
|
+
async def on_disconnect(self, worker_id: WorkerID, request: DisconnectRequest):
|
|
177
|
+
raise NotImplementedError()
|
|
178
|
+
|
|
179
|
+
@abc.abstractmethod
|
|
180
|
+
def has_available_worker(self) -> bool:
|
|
181
|
+
raise NotImplementedError()
|
|
182
|
+
|
|
183
|
+
@abc.abstractmethod
|
|
184
|
+
def get_worker_by_task_id(self, task_id: TaskID) -> WorkerID:
|
|
185
|
+
raise NotImplementedError()
|
|
186
|
+
|
|
187
|
+
@abc.abstractmethod
|
|
188
|
+
def get_worker_ids(self) -> Set[WorkerID]:
|
|
189
|
+
raise NotImplementedError()
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class ScalingController(Reporter):
|
|
193
|
+
@abc.abstractmethod
|
|
194
|
+
async def on_snapshot(self, snapshot: InformationSnapshot):
|
|
195
|
+
raise NotImplementedError()
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
class InformationController(metaclass=abc.ABCMeta):
|
|
199
|
+
@abc.abstractmethod
|
|
200
|
+
async def on_request(self, request: InformationRequest):
|
|
201
|
+
raise NotImplementedError()
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import logging
|
|
3
|
+
from asyncio import Queue
|
|
4
|
+
from typing import Optional, Set
|
|
5
|
+
|
|
6
|
+
from scaler.io.mixins import AsyncBinder, AsyncConnector, AsyncObjectStorageConnector
|
|
7
|
+
from scaler.protocol.python.common import ObjectMetadata
|
|
8
|
+
from scaler.protocol.python.message import ObjectInstruction
|
|
9
|
+
from scaler.protocol.python.status import ObjectManagerStatus
|
|
10
|
+
from scaler.scheduler.controllers.config_controller import VanillaConfigController
|
|
11
|
+
from scaler.scheduler.controllers.mixins import ClientController, ObjectController, WorkerController
|
|
12
|
+
from scaler.scheduler.object_usage.object_tracker import ObjectTracker, ObjectUsage
|
|
13
|
+
from scaler.utility.identifiers import ClientID, ObjectID
|
|
14
|
+
from scaler.utility.mixins import Looper, Reporter
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclasses.dataclass
|
|
18
|
+
class _ObjectCreation(ObjectUsage):
|
|
19
|
+
object_id: ObjectID
|
|
20
|
+
object_creator: ClientID
|
|
21
|
+
object_type: ObjectMetadata.ObjectContentType
|
|
22
|
+
object_name: bytes
|
|
23
|
+
|
|
24
|
+
def get_object_key(self) -> ObjectID:
|
|
25
|
+
return self.object_id
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class VanillaObjectController(ObjectController, Looper, Reporter):
|
|
29
|
+
def __init__(self, config_controller: VanillaConfigController):
|
|
30
|
+
self._config_controller = config_controller
|
|
31
|
+
|
|
32
|
+
self._object_tracker: ObjectTracker[ClientID, ObjectID, _ObjectCreation] = ObjectTracker(
|
|
33
|
+
"object_usage", self.__finished_object_storage
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
self._queue_deleted_object_ids: Queue[ObjectID] = Queue()
|
|
37
|
+
|
|
38
|
+
self._binder: Optional[AsyncBinder] = None
|
|
39
|
+
self._binder_monitor: Optional[AsyncConnector] = None
|
|
40
|
+
self._connector_storage: Optional[AsyncObjectStorageConnector] = None
|
|
41
|
+
|
|
42
|
+
self._client_manager: Optional[ClientController] = None
|
|
43
|
+
self._worker_manager: Optional[WorkerController] = None
|
|
44
|
+
|
|
45
|
+
def register(
|
|
46
|
+
self,
|
|
47
|
+
binder: AsyncBinder,
|
|
48
|
+
binder_monitor: AsyncConnector,
|
|
49
|
+
connector_storage: AsyncObjectStorageConnector,
|
|
50
|
+
client_manager: ClientController,
|
|
51
|
+
worker_manager: WorkerController,
|
|
52
|
+
):
|
|
53
|
+
self._binder = binder
|
|
54
|
+
self._binder_monitor = binder_monitor
|
|
55
|
+
self._connector_storage = connector_storage
|
|
56
|
+
self._client_manager = client_manager
|
|
57
|
+
self._worker_manager = worker_manager
|
|
58
|
+
|
|
59
|
+
async def on_object_instruction(self, source: bytes, instruction: ObjectInstruction):
|
|
60
|
+
if instruction.instruction_type == ObjectInstruction.ObjectInstructionType.Create:
|
|
61
|
+
self.__on_object_create(source, instruction)
|
|
62
|
+
return
|
|
63
|
+
|
|
64
|
+
if instruction.instruction_type == ObjectInstruction.ObjectInstructionType.Delete:
|
|
65
|
+
self.on_del_objects(instruction.object_user, set(instruction.object_metadata.object_ids))
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
logging.error(f"received unknown object instruction_type={instruction.instruction_type} from {source=}")
|
|
69
|
+
|
|
70
|
+
def on_add_object(
|
|
71
|
+
self,
|
|
72
|
+
client_id: ClientID,
|
|
73
|
+
object_id: ObjectID,
|
|
74
|
+
object_type: ObjectMetadata.ObjectContentType,
|
|
75
|
+
object_name: bytes,
|
|
76
|
+
):
|
|
77
|
+
creation = _ObjectCreation(object_id, client_id, object_type, object_name)
|
|
78
|
+
logging.debug(
|
|
79
|
+
f"add object cache "
|
|
80
|
+
f"object_name={creation.object_name!r}, "
|
|
81
|
+
f"object_type={creation.object_type}, "
|
|
82
|
+
f"object_id={creation.object_id!r}"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
self._object_tracker.add_object(creation)
|
|
86
|
+
self._object_tracker.add_blocks_for_one_object(creation.get_object_key(), {creation.object_creator})
|
|
87
|
+
|
|
88
|
+
def on_del_objects(self, client_id: ClientID, object_ids: Set[ObjectID]):
|
|
89
|
+
for object_id in object_ids:
|
|
90
|
+
self._object_tracker.remove_one_block_for_objects({object_id}, client_id)
|
|
91
|
+
|
|
92
|
+
def clean_client(self, client_id: ClientID):
|
|
93
|
+
self._object_tracker.remove_blocks({client_id})
|
|
94
|
+
|
|
95
|
+
async def routine(self):
|
|
96
|
+
await self.__routine_send_objects_deletions()
|
|
97
|
+
|
|
98
|
+
def has_object(self, object_id: ObjectID) -> bool:
|
|
99
|
+
return self._object_tracker.has_object(object_id)
|
|
100
|
+
|
|
101
|
+
def get_object_name(self, object_id: ObjectID) -> bytes:
|
|
102
|
+
if not self.has_object(object_id):
|
|
103
|
+
return b"<Unknown>"
|
|
104
|
+
|
|
105
|
+
return self._object_tracker.get_object(object_id).object_name
|
|
106
|
+
|
|
107
|
+
def get_status(self) -> ObjectManagerStatus:
|
|
108
|
+
return ObjectManagerStatus.new_msg(self._object_tracker.object_count())
|
|
109
|
+
|
|
110
|
+
async def __routine_send_objects_deletions(self):
|
|
111
|
+
deleted_object_ids = [await self._queue_deleted_object_ids.get()]
|
|
112
|
+
self._queue_deleted_object_ids.task_done()
|
|
113
|
+
|
|
114
|
+
while not self._queue_deleted_object_ids.empty():
|
|
115
|
+
deleted_object_ids.append(self._queue_deleted_object_ids.get_nowait())
|
|
116
|
+
self._queue_deleted_object_ids.task_done()
|
|
117
|
+
|
|
118
|
+
for worker in self._worker_manager.get_worker_ids():
|
|
119
|
+
await self._binder.send(
|
|
120
|
+
worker,
|
|
121
|
+
ObjectInstruction.new_msg(
|
|
122
|
+
ObjectInstruction.ObjectInstructionType.Delete,
|
|
123
|
+
# TODO: ideally object_user should be set to the owning client ID, but then we cannot batch these
|
|
124
|
+
# Delete instructions.
|
|
125
|
+
None,
|
|
126
|
+
ObjectMetadata.new_msg(tuple(deleted_object_ids)),
|
|
127
|
+
),
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
for object_id in deleted_object_ids:
|
|
131
|
+
await self._connector_storage.delete_object(object_id)
|
|
132
|
+
|
|
133
|
+
def __on_object_create(self, source: bytes, instruction: ObjectInstruction):
|
|
134
|
+
if not self._client_manager.has_client_id(instruction.object_user):
|
|
135
|
+
logging.error(f"received object creation from {source!r} for unknown client {instruction.object_user!r}")
|
|
136
|
+
return
|
|
137
|
+
|
|
138
|
+
for object_id, object_type, object_name in zip(
|
|
139
|
+
instruction.object_metadata.object_ids,
|
|
140
|
+
instruction.object_metadata.object_types,
|
|
141
|
+
instruction.object_metadata.object_names,
|
|
142
|
+
):
|
|
143
|
+
self.on_add_object(instruction.object_user, object_id, object_type, object_name)
|
|
144
|
+
|
|
145
|
+
def __finished_object_storage(self, creation: _ObjectCreation):
|
|
146
|
+
logging.debug(f"del object cache object_name={creation.object_name!r}, object_id={creation.object_id!r}")
|
|
147
|
+
self._queue_deleted_object_ids.put_nowait(creation.object_id)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict, List
|
|
3
|
+
|
|
4
|
+
import aiohttp
|
|
5
|
+
from aiohttp import web
|
|
6
|
+
|
|
7
|
+
from scaler.protocol.python.message import InformationSnapshot
|
|
8
|
+
from scaler.protocol.python.status import ScalingManagerStatus
|
|
9
|
+
from scaler.scheduler.controllers.mixins import ScalingController
|
|
10
|
+
from scaler.utility.identifiers import WorkerID
|
|
11
|
+
|
|
12
|
+
WorkerGroupID = bytes
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class NullScalingController(ScalingController):
|
|
16
|
+
def get_status(self):
|
|
17
|
+
return ScalingManagerStatus.new_msg(worker_groups={})
|
|
18
|
+
|
|
19
|
+
async def on_snapshot(self, information_snapshot: InformationSnapshot):
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class VanillaScalingController(ScalingController):
|
|
24
|
+
def __init__(self, adapter_webhook_url: str, lower_task_ratio: float = 1, upper_task_ratio: float = 10):
|
|
25
|
+
self._adapter_webhook_url = adapter_webhook_url
|
|
26
|
+
self._lower_task_ratio = lower_task_ratio
|
|
27
|
+
self._upper_task_ratio = upper_task_ratio
|
|
28
|
+
assert upper_task_ratio >= lower_task_ratio
|
|
29
|
+
|
|
30
|
+
self._worker_groups: Dict[WorkerGroupID, List[WorkerID]] = {}
|
|
31
|
+
|
|
32
|
+
def get_status(self):
|
|
33
|
+
return ScalingManagerStatus.new_msg(worker_groups=self._worker_groups)
|
|
34
|
+
|
|
35
|
+
async def on_snapshot(self, information_snapshot: InformationSnapshot):
|
|
36
|
+
if not information_snapshot.workers:
|
|
37
|
+
if information_snapshot.tasks:
|
|
38
|
+
await self._start_worker_group()
|
|
39
|
+
return
|
|
40
|
+
|
|
41
|
+
task_ratio = len(information_snapshot.tasks) / len(information_snapshot.workers)
|
|
42
|
+
if task_ratio > self._upper_task_ratio:
|
|
43
|
+
await self._start_worker_group()
|
|
44
|
+
elif task_ratio < self._lower_task_ratio:
|
|
45
|
+
worker_group_task_counts = {
|
|
46
|
+
worker_group_id: sum(information_snapshot.workers[worker_id].queued_tasks for worker_id in worker_ids)
|
|
47
|
+
for worker_group_id, worker_ids in self._worker_groups.items()
|
|
48
|
+
}
|
|
49
|
+
worker_group_id = min(worker_group_task_counts, key=worker_group_task_counts.get)
|
|
50
|
+
await self._shutdown_worker_group(worker_group_id)
|
|
51
|
+
|
|
52
|
+
async def _start_worker_group(self):
|
|
53
|
+
response, status = await self._make_request({"action": "start_worker_group"})
|
|
54
|
+
if status == web.HTTPTooManyRequests.status_code:
|
|
55
|
+
logging.warning("Capacity exceeded, cannot start new worker group.")
|
|
56
|
+
return
|
|
57
|
+
if status == web.HTTPInternalServerError.status_code:
|
|
58
|
+
logging.error(f"Failed to start worker group: {response.get('error', 'Unknown error')}")
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
worker_group_id = response["worker_group_id"].encode()
|
|
62
|
+
self._worker_groups[worker_group_id] = [WorkerID(worker_id.encode()) for worker_id in response["worker_ids"]]
|
|
63
|
+
logging.info(f"Started worker group: {worker_group_id.decode()}")
|
|
64
|
+
|
|
65
|
+
async def _shutdown_worker_group(self, worker_group_id: WorkerGroupID):
|
|
66
|
+
if worker_group_id not in self._worker_groups:
|
|
67
|
+
logging.error(f"Worker group with ID {worker_group_id.decode()} does not exist.")
|
|
68
|
+
return
|
|
69
|
+
|
|
70
|
+
response, status = await self._make_request(
|
|
71
|
+
{"action": "shutdown_worker_group", "worker_group_id": worker_group_id.decode()}
|
|
72
|
+
)
|
|
73
|
+
if status == web.HTTPNotFound.status_code:
|
|
74
|
+
logging.error(f"Worker group with ID {worker_group_id.decode()} not found in adapter.")
|
|
75
|
+
return
|
|
76
|
+
if status == web.HTTPInternalServerError.status_code:
|
|
77
|
+
logging.error(f"Failed to shutdown worker group: {response.get('error', 'Unknown error')}")
|
|
78
|
+
return
|
|
79
|
+
|
|
80
|
+
self._worker_groups.pop(worker_group_id)
|
|
81
|
+
logging.info(f"Shutdown worker group: {worker_group_id.decode()}")
|
|
82
|
+
|
|
83
|
+
async def _make_request(self, payload):
|
|
84
|
+
async with aiohttp.ClientSession() as session:
|
|
85
|
+
async with session.post(self._adapter_webhook_url, json=payload) as response:
|
|
86
|
+
return await response.json(), response.status
|