opengris-scaler 1.12.37__cp38-cp38-musllinux_1_2_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opengris_scaler-1.12.37.dist-info/METADATA +730 -0
- opengris_scaler-1.12.37.dist-info/RECORD +196 -0
- opengris_scaler-1.12.37.dist-info/WHEEL +5 -0
- opengris_scaler-1.12.37.dist-info/entry_points.txt +10 -0
- opengris_scaler-1.12.37.dist-info/licenses/LICENSE +201 -0
- opengris_scaler-1.12.37.dist-info/licenses/LICENSE.spdx +7 -0
- opengris_scaler-1.12.37.dist-info/licenses/NOTICE +8 -0
- opengris_scaler.libs/libcapnp-1-e88d5415.0.1.so +0 -0
- opengris_scaler.libs/libgcc_s-2298274a.so.1 +0 -0
- opengris_scaler.libs/libkj-1-9bebd8ac.0.1.so +0 -0
- opengris_scaler.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
- scaler/__init__.py +14 -0
- scaler/about.py +5 -0
- scaler/client/__init__.py +0 -0
- scaler/client/agent/__init__.py +0 -0
- scaler/client/agent/client_agent.py +218 -0
- scaler/client/agent/disconnect_manager.py +27 -0
- scaler/client/agent/future_manager.py +112 -0
- scaler/client/agent/heartbeat_manager.py +74 -0
- scaler/client/agent/mixins.py +89 -0
- scaler/client/agent/object_manager.py +98 -0
- scaler/client/agent/task_manager.py +64 -0
- scaler/client/client.py +672 -0
- scaler/client/future.py +252 -0
- scaler/client/object_buffer.py +129 -0
- scaler/client/object_reference.py +25 -0
- scaler/client/serializer/__init__.py +0 -0
- scaler/client/serializer/default.py +16 -0
- scaler/client/serializer/mixins.py +38 -0
- scaler/cluster/__init__.py +0 -0
- scaler/cluster/cluster.py +95 -0
- scaler/cluster/combo.py +157 -0
- scaler/cluster/object_storage_server.py +45 -0
- scaler/cluster/scheduler.py +86 -0
- scaler/config/__init__.py +0 -0
- scaler/config/common/__init__.py +0 -0
- scaler/config/common/logging.py +41 -0
- scaler/config/common/web.py +18 -0
- scaler/config/common/worker.py +65 -0
- scaler/config/common/worker_adapter.py +28 -0
- scaler/config/config_class.py +317 -0
- scaler/config/defaults.py +94 -0
- scaler/config/mixins.py +20 -0
- scaler/config/section/__init__.py +0 -0
- scaler/config/section/cluster.py +66 -0
- scaler/config/section/ecs_worker_adapter.py +78 -0
- scaler/config/section/native_worker_adapter.py +30 -0
- scaler/config/section/object_storage_server.py +13 -0
- scaler/config/section/scheduler.py +126 -0
- scaler/config/section/symphony_worker_adapter.py +35 -0
- scaler/config/section/top.py +16 -0
- scaler/config/section/webui.py +16 -0
- scaler/config/types/__init__.py +0 -0
- scaler/config/types/network_backend.py +12 -0
- scaler/config/types/object_storage_server.py +45 -0
- scaler/config/types/worker.py +67 -0
- scaler/config/types/zmq.py +83 -0
- scaler/entry_points/__init__.py +0 -0
- scaler/entry_points/cluster.py +10 -0
- scaler/entry_points/object_storage_server.py +26 -0
- scaler/entry_points/scheduler.py +51 -0
- scaler/entry_points/top.py +272 -0
- scaler/entry_points/webui.py +6 -0
- scaler/entry_points/worker_adapter_ecs.py +22 -0
- scaler/entry_points/worker_adapter_native.py +31 -0
- scaler/entry_points/worker_adapter_symphony.py +26 -0
- scaler/io/__init__.py +0 -0
- scaler/io/async_binder.py +89 -0
- scaler/io/async_connector.py +95 -0
- scaler/io/async_object_storage_connector.py +225 -0
- scaler/io/mixins.py +154 -0
- scaler/io/sync_connector.py +68 -0
- scaler/io/sync_object_storage_connector.py +249 -0
- scaler/io/sync_subscriber.py +83 -0
- scaler/io/utility.py +80 -0
- scaler/io/ymq/__init__.py +0 -0
- scaler/io/ymq/_ymq.pyi +95 -0
- scaler/io/ymq/_ymq.so +0 -0
- scaler/io/ymq/ymq.py +138 -0
- scaler/io/ymq_async_object_storage_connector.py +184 -0
- scaler/io/ymq_sync_object_storage_connector.py +184 -0
- scaler/object_storage/__init__.py +0 -0
- scaler/object_storage/object_storage_server.so +0 -0
- scaler/protocol/__init__.py +0 -0
- scaler/protocol/capnp/__init__.py +0 -0
- scaler/protocol/capnp/_python.py +6 -0
- scaler/protocol/capnp/common.capnp +68 -0
- scaler/protocol/capnp/message.capnp +218 -0
- scaler/protocol/capnp/object_storage.capnp +57 -0
- scaler/protocol/capnp/status.capnp +73 -0
- scaler/protocol/introduction.md +105 -0
- scaler/protocol/python/__init__.py +0 -0
- scaler/protocol/python/common.py +140 -0
- scaler/protocol/python/message.py +751 -0
- scaler/protocol/python/mixins.py +13 -0
- scaler/protocol/python/object_storage.py +118 -0
- scaler/protocol/python/status.py +279 -0
- scaler/protocol/worker.md +228 -0
- scaler/scheduler/__init__.py +0 -0
- scaler/scheduler/allocate_policy/__init__.py +0 -0
- scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
- scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
- scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
- scaler/scheduler/allocate_policy/mixins.py +55 -0
- scaler/scheduler/controllers/__init__.py +0 -0
- scaler/scheduler/controllers/balance_controller.py +65 -0
- scaler/scheduler/controllers/client_controller.py +131 -0
- scaler/scheduler/controllers/config_controller.py +31 -0
- scaler/scheduler/controllers/graph_controller.py +424 -0
- scaler/scheduler/controllers/information_controller.py +81 -0
- scaler/scheduler/controllers/mixins.py +194 -0
- scaler/scheduler/controllers/object_controller.py +147 -0
- scaler/scheduler/controllers/scaling_policies/__init__.py +0 -0
- scaler/scheduler/controllers/scaling_policies/fixed_elastic.py +145 -0
- scaler/scheduler/controllers/scaling_policies/mixins.py +10 -0
- scaler/scheduler/controllers/scaling_policies/null.py +14 -0
- scaler/scheduler/controllers/scaling_policies/types.py +9 -0
- scaler/scheduler/controllers/scaling_policies/utility.py +20 -0
- scaler/scheduler/controllers/scaling_policies/vanilla.py +95 -0
- scaler/scheduler/controllers/task_controller.py +376 -0
- scaler/scheduler/controllers/worker_controller.py +169 -0
- scaler/scheduler/object_usage/__init__.py +0 -0
- scaler/scheduler/object_usage/object_tracker.py +131 -0
- scaler/scheduler/scheduler.py +251 -0
- scaler/scheduler/task/__init__.py +0 -0
- scaler/scheduler/task/task_state_machine.py +92 -0
- scaler/scheduler/task/task_state_manager.py +61 -0
- scaler/ui/__init__.py +0 -0
- scaler/ui/common/__init__.py +0 -0
- scaler/ui/common/constants.py +9 -0
- scaler/ui/common/live_display.py +147 -0
- scaler/ui/common/memory_window.py +146 -0
- scaler/ui/common/setting_page.py +40 -0
- scaler/ui/common/task_graph.py +840 -0
- scaler/ui/common/task_log.py +111 -0
- scaler/ui/common/utility.py +66 -0
- scaler/ui/common/webui.py +80 -0
- scaler/ui/common/worker_processors.py +104 -0
- scaler/ui/v1.py +76 -0
- scaler/ui/v2.py +102 -0
- scaler/ui/webui.py +21 -0
- scaler/utility/__init__.py +0 -0
- scaler/utility/debug.py +19 -0
- scaler/utility/event_list.py +63 -0
- scaler/utility/event_loop.py +58 -0
- scaler/utility/exceptions.py +42 -0
- scaler/utility/formatter.py +44 -0
- scaler/utility/graph/__init__.py +0 -0
- scaler/utility/graph/optimization.py +27 -0
- scaler/utility/graph/topological_sorter.py +11 -0
- scaler/utility/graph/topological_sorter_graphblas.py +174 -0
- scaler/utility/identifiers.py +107 -0
- scaler/utility/logging/__init__.py +0 -0
- scaler/utility/logging/decorators.py +25 -0
- scaler/utility/logging/scoped_logger.py +33 -0
- scaler/utility/logging/utility.py +183 -0
- scaler/utility/many_to_many_dict.py +123 -0
- scaler/utility/metadata/__init__.py +0 -0
- scaler/utility/metadata/profile_result.py +31 -0
- scaler/utility/metadata/task_flags.py +30 -0
- scaler/utility/mixins.py +13 -0
- scaler/utility/network_util.py +7 -0
- scaler/utility/one_to_many_dict.py +72 -0
- scaler/utility/queues/__init__.py +0 -0
- scaler/utility/queues/async_indexed_queue.py +37 -0
- scaler/utility/queues/async_priority_queue.py +70 -0
- scaler/utility/queues/async_sorted_priority_queue.py +45 -0
- scaler/utility/queues/indexed_queue.py +114 -0
- scaler/utility/serialization.py +9 -0
- scaler/version.txt +1 -0
- scaler/worker/__init__.py +0 -0
- scaler/worker/agent/__init__.py +0 -0
- scaler/worker/agent/heartbeat_manager.py +110 -0
- scaler/worker/agent/mixins.py +137 -0
- scaler/worker/agent/processor/__init__.py +0 -0
- scaler/worker/agent/processor/object_cache.py +107 -0
- scaler/worker/agent/processor/processor.py +285 -0
- scaler/worker/agent/processor/streaming_buffer.py +28 -0
- scaler/worker/agent/processor_holder.py +147 -0
- scaler/worker/agent/processor_manager.py +369 -0
- scaler/worker/agent/profiling_manager.py +109 -0
- scaler/worker/agent/task_manager.py +150 -0
- scaler/worker/agent/timeout_manager.py +19 -0
- scaler/worker/preload.py +84 -0
- scaler/worker/worker.py +265 -0
- scaler/worker_adapter/__init__.py +0 -0
- scaler/worker_adapter/common.py +26 -0
- scaler/worker_adapter/ecs.py +241 -0
- scaler/worker_adapter/native.py +138 -0
- scaler/worker_adapter/symphony/__init__.py +0 -0
- scaler/worker_adapter/symphony/callback.py +45 -0
- scaler/worker_adapter/symphony/heartbeat_manager.py +82 -0
- scaler/worker_adapter/symphony/message.py +24 -0
- scaler/worker_adapter/symphony/task_manager.py +289 -0
- scaler/worker_adapter/symphony/worker.py +204 -0
- scaler/worker_adapter/symphony/worker_adapter.py +123 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
from collections import deque
|
|
3
|
+
from threading import Lock
|
|
4
|
+
from typing import Deque, Dict, Optional
|
|
5
|
+
|
|
6
|
+
from nicegui import ui
|
|
7
|
+
|
|
8
|
+
from scaler.protocol.python.common import TaskState
|
|
9
|
+
from scaler.protocol.python.message import StateTask, StateWorker
|
|
10
|
+
from scaler.ui.common.utility import COMPLETED_TASK_STATUSES, display_capabilities
|
|
11
|
+
from scaler.ui.util import NICEGUI_MAJOR_VERSION
|
|
12
|
+
from scaler.utility.formatter import format_bytes
|
|
13
|
+
from scaler.utility.metadata.profile_result import ProfileResult
|
|
14
|
+
|
|
15
|
+
TASK_ID_HTML_TEMPLATE = (
|
|
16
|
+
"<span "
|
|
17
|
+
"style='display:inline-block;max-width:12rem;overflow:hidden;text-overflow:ellipsis;"
|
|
18
|
+
"white-space:nowrap;cursor:pointer;font:inherit;color:inherit' "
|
|
19
|
+
"title='{task}' onclick=\"navigator.clipboard.writeText('{task}')\">{task}</span>"
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclasses.dataclass
|
|
24
|
+
class TaskData:
|
|
25
|
+
task: str = dataclasses.field(default="")
|
|
26
|
+
function: str = dataclasses.field(default="")
|
|
27
|
+
duration: str = dataclasses.field(default="")
|
|
28
|
+
peak_mem: str = dataclasses.field(default="")
|
|
29
|
+
status: str = dataclasses.field(default="")
|
|
30
|
+
capabilities: str = dataclasses.field(default="")
|
|
31
|
+
|
|
32
|
+
def populate(
|
|
33
|
+
self,
|
|
34
|
+
state: StateTask,
|
|
35
|
+
function_name: str,
|
|
36
|
+
profiling_data: Optional[ProfileResult],
|
|
37
|
+
task_capabilities: Dict[str, int],
|
|
38
|
+
):
|
|
39
|
+
self.task = f"{state.task_id.hex()}"
|
|
40
|
+
self.function = function_name
|
|
41
|
+
self.status = state.state.name
|
|
42
|
+
|
|
43
|
+
if profiling_data:
|
|
44
|
+
duration = profiling_data.duration_s
|
|
45
|
+
mem = profiling_data.memory_peak
|
|
46
|
+
self.duration = f"{duration:.2f}s"
|
|
47
|
+
self.peak_mem = format_bytes(mem) if mem != 0 else "0"
|
|
48
|
+
else:
|
|
49
|
+
self.duration = "N/A"
|
|
50
|
+
self.peak_mem = "N/A"
|
|
51
|
+
|
|
52
|
+
self.capabilities = display_capabilities(set(task_capabilities.keys()))
|
|
53
|
+
|
|
54
|
+
def draw_row(self):
|
|
55
|
+
color = "color: green" if self.status == TaskState.Success.name else "color: red"
|
|
56
|
+
if NICEGUI_MAJOR_VERSION < 3:
|
|
57
|
+
ui.html(TASK_ID_HTML_TEMPLATE.format(task=self.task))
|
|
58
|
+
else:
|
|
59
|
+
ui.html(TASK_ID_HTML_TEMPLATE.format(task=self.task), sanitize=False) # type: ignore[call-arg]
|
|
60
|
+
ui.label(self.function)
|
|
61
|
+
ui.label(self.duration)
|
|
62
|
+
ui.label(self.peak_mem)
|
|
63
|
+
ui.label(self.status).style(color)
|
|
64
|
+
ui.label(self.capabilities)
|
|
65
|
+
|
|
66
|
+
@staticmethod
|
|
67
|
+
def draw_titles():
|
|
68
|
+
ui.label("Task ID")
|
|
69
|
+
ui.label("Function")
|
|
70
|
+
ui.label("Duration")
|
|
71
|
+
ui.label("Peak mem")
|
|
72
|
+
ui.label("Status")
|
|
73
|
+
ui.label("Capabilities")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class TaskLogTable:
|
|
77
|
+
def __init__(self):
|
|
78
|
+
self._task_log: Deque[TaskData] = deque(maxlen=100)
|
|
79
|
+
self._task_id_to_function_name: Dict[str, str] = {}
|
|
80
|
+
self._lock: Lock = Lock()
|
|
81
|
+
|
|
82
|
+
def handle_task_state(self, state_task: StateTask):
|
|
83
|
+
if state_task.function_name != b"" and state_task.task_id.hex() not in self._task_id_to_function_name:
|
|
84
|
+
self._task_id_to_function_name[state_task.task_id.hex()] = state_task.function_name.decode()
|
|
85
|
+
|
|
86
|
+
if state_task.state not in COMPLETED_TASK_STATUSES:
|
|
87
|
+
return
|
|
88
|
+
|
|
89
|
+
function_name = state_task.function_name.decode()
|
|
90
|
+
if function_name == "":
|
|
91
|
+
function_name = self._task_id_to_function_name.pop(state_task.task_id.hex(), "")
|
|
92
|
+
|
|
93
|
+
# Canceled/failed states don't have profiling metadata
|
|
94
|
+
profiling_data = ProfileResult.deserialize(state_task.metadata) if state_task.metadata != b"" else None
|
|
95
|
+
|
|
96
|
+
row = TaskData()
|
|
97
|
+
row.populate(state_task, function_name, profiling_data, state_task.capabilities)
|
|
98
|
+
|
|
99
|
+
with self._lock:
|
|
100
|
+
self._task_log.appendleft(row)
|
|
101
|
+
|
|
102
|
+
def handle_worker_state(self, _: StateWorker):
|
|
103
|
+
return
|
|
104
|
+
|
|
105
|
+
@ui.refreshable
|
|
106
|
+
def draw_section(self):
|
|
107
|
+
with self._lock:
|
|
108
|
+
with ui.card().classes("w-full q-mx-auto"), ui.grid(columns=6).classes("q-mx-auto"):
|
|
109
|
+
TaskData.draw_titles()
|
|
110
|
+
for task in self._task_log:
|
|
111
|
+
task.draw_row()
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from typing import List, Set, Tuple
|
|
3
|
+
|
|
4
|
+
from scaler.protocol.python.common import TaskState
|
|
5
|
+
from scaler.ui.common.setting_page import Settings
|
|
6
|
+
|
|
7
|
+
COMPLETED_TASK_STATUSES = {
|
|
8
|
+
TaskState.Success,
|
|
9
|
+
TaskState.Canceled,
|
|
10
|
+
TaskState.CanceledNotFound,
|
|
11
|
+
TaskState.Failed,
|
|
12
|
+
TaskState.FailedWorkerDied,
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def format_timediff(a: datetime.datetime, b: datetime.datetime) -> float:
|
|
17
|
+
return (b - a).total_seconds()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def format_worker_name(worker_name: str, cutoff: int = 15) -> str:
|
|
21
|
+
return worker_name[:cutoff] + "+" if len(worker_name) > cutoff else worker_name
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_bounds(now: datetime.datetime, start_time: datetime.datetime, settings: Settings) -> Tuple[int, int]:
|
|
25
|
+
upper_range = now - start_time
|
|
26
|
+
lower_range = upper_range - settings.stream_window
|
|
27
|
+
|
|
28
|
+
bound_upper_seconds = max(upper_range.seconds, settings.stream_window.seconds)
|
|
29
|
+
bound_lower_seconds = 0 if bound_upper_seconds == settings.stream_window.seconds else lower_range.seconds
|
|
30
|
+
|
|
31
|
+
return bound_lower_seconds, bound_upper_seconds
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def make_taskstream_ticks(lower_bound: int, upper_bound: int) -> List[int]:
|
|
35
|
+
distance = (upper_bound - lower_bound) // 6
|
|
36
|
+
return list(range(lower_bound, upper_bound + 1, distance))
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def make_memory_ticks(max_bytes: int) -> Tuple[List[int], List[str]]:
|
|
40
|
+
units = ["B", "KB", "MB", "GB", "TB"]
|
|
41
|
+
vals: List[int] = [0]
|
|
42
|
+
texts: List[str] = ["0"]
|
|
43
|
+
v = 1
|
|
44
|
+
i = 0
|
|
45
|
+
# ensure at least up to 1GB on empty data
|
|
46
|
+
target = max(1024 * 1024 * 1024, max_bytes)
|
|
47
|
+
while i < len(units) and v <= target:
|
|
48
|
+
vals.append(v)
|
|
49
|
+
texts.append(f"1{units[i]}")
|
|
50
|
+
v *= 1024
|
|
51
|
+
i += 1
|
|
52
|
+
return vals, texts
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def make_tick_text(window_length: int) -> List[int]:
|
|
56
|
+
upper = 0
|
|
57
|
+
lower = -1 * window_length
|
|
58
|
+
distance = (upper - lower) // 6
|
|
59
|
+
return list(range(lower, upper + 1, distance))
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def display_capabilities(capabilities: Set[str]) -> str:
|
|
63
|
+
if not capabilities or len(capabilities) == 0:
|
|
64
|
+
return "<no capabilities>"
|
|
65
|
+
|
|
66
|
+
return " & ".join(sorted(capabilities))
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
from scaler.protocol.python.message import StateBalanceAdvice, StateScheduler, StateTask, StateWorker
|
|
5
|
+
from scaler.protocol.python.mixins import Message
|
|
6
|
+
from scaler.ui.common.live_display import SchedulerSection, WorkersSection
|
|
7
|
+
from scaler.ui.common.memory_window import MemoryChart
|
|
8
|
+
from scaler.ui.common.setting_page import Settings
|
|
9
|
+
from scaler.ui.common.task_graph import TaskStream
|
|
10
|
+
from scaler.ui.common.task_log import TaskLogTable
|
|
11
|
+
from scaler.ui.common.worker_processors import WorkerProcessors
|
|
12
|
+
from scaler.utility.formatter import format_bytes, format_percentage
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclasses.dataclass
|
|
16
|
+
class Sections:
|
|
17
|
+
scheduler_section: SchedulerSection
|
|
18
|
+
workers_section: WorkersSection
|
|
19
|
+
task_stream_section: TaskStream
|
|
20
|
+
memory_usage_section: MemoryChart
|
|
21
|
+
tasklog_section: TaskLogTable
|
|
22
|
+
worker_processors: WorkerProcessors
|
|
23
|
+
settings_section: Settings
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def process_scheduler_message(status: Message, tables: Sections):
|
|
27
|
+
if isinstance(status, StateScheduler):
|
|
28
|
+
__update_scheduler_state(status, tables)
|
|
29
|
+
return
|
|
30
|
+
|
|
31
|
+
if isinstance(status, StateWorker):
|
|
32
|
+
logging.info(f"Received StateWorker update for worker {status.worker_id.decode()} with {status.state.name}")
|
|
33
|
+
tables.scheduler_section.handle_worker_state(status)
|
|
34
|
+
tables.workers_section.handle_worker_state(status)
|
|
35
|
+
tables.task_stream_section.handle_worker_state(status)
|
|
36
|
+
tables.memory_usage_section.handle_worker_state(status)
|
|
37
|
+
tables.tasklog_section.handle_worker_state(status)
|
|
38
|
+
tables.worker_processors.handle_worker_state(status)
|
|
39
|
+
tables.settings_section.handle_worker_state(status)
|
|
40
|
+
return
|
|
41
|
+
|
|
42
|
+
if isinstance(status, StateTask):
|
|
43
|
+
logging.debug(f"Received StateTask update for task {status.task_id.hex()} with {status.state.name}")
|
|
44
|
+
tables.scheduler_section.handle_task_state(status)
|
|
45
|
+
tables.workers_section.handle_task_state(status)
|
|
46
|
+
tables.task_stream_section.handle_task_state(status)
|
|
47
|
+
tables.memory_usage_section.handle_task_state(status)
|
|
48
|
+
tables.tasklog_section.handle_task_state(status)
|
|
49
|
+
tables.worker_processors.handle_task_state(status)
|
|
50
|
+
tables.settings_section.handle_task_state(status)
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
if isinstance(status, StateBalanceAdvice):
|
|
54
|
+
logging.debug(f"Received StateBalanceAdvice for {status.worker_id.decode()} with {len(status.task_ids)} tasks")
|
|
55
|
+
return
|
|
56
|
+
|
|
57
|
+
logging.info(f"Unhandled message received: {type(status)}")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def __update_scheduler_state(data: StateScheduler, tables: Sections):
|
|
61
|
+
tables.scheduler_section.cpu = format_percentage(data.scheduler.cpu)
|
|
62
|
+
tables.scheduler_section.rss = format_bytes(data.scheduler.rss)
|
|
63
|
+
tables.scheduler_section.rss_free = format_bytes(data.rss_free)
|
|
64
|
+
|
|
65
|
+
previous_workers = set(tables.workers_section.workers.keys())
|
|
66
|
+
current_workers = set(worker_data.worker_id.decode() for worker_data in data.worker_manager.workers)
|
|
67
|
+
|
|
68
|
+
for worker_data in data.worker_manager.workers:
|
|
69
|
+
worker_name = worker_data.worker_id.decode()
|
|
70
|
+
tables.workers_section.workers[worker_name].populate(worker_data)
|
|
71
|
+
|
|
72
|
+
for died_worker in previous_workers - current_workers:
|
|
73
|
+
tables.workers_section.workers.pop(died_worker)
|
|
74
|
+
tables.worker_processors.remove_worker(died_worker)
|
|
75
|
+
tables.task_stream_section.mark_dead_worker(died_worker)
|
|
76
|
+
|
|
77
|
+
if previous_workers != current_workers:
|
|
78
|
+
tables.workers_section.draw_section.refresh()
|
|
79
|
+
|
|
80
|
+
tables.worker_processors.update_data(data.worker_manager.workers)
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
from threading import Lock
|
|
3
|
+
from typing import Dict, List, Optional
|
|
4
|
+
|
|
5
|
+
from nicegui import ui
|
|
6
|
+
from nicegui.element import Element
|
|
7
|
+
|
|
8
|
+
from scaler.protocol.python.common import WorkerState
|
|
9
|
+
from scaler.protocol.python.message import StateTask, StateWorker
|
|
10
|
+
from scaler.protocol.python.status import ProcessorStatus, WorkerStatus
|
|
11
|
+
from scaler.ui.common.utility import format_worker_name
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclasses.dataclass
|
|
15
|
+
class WorkerProcessors:
|
|
16
|
+
workers: Dict[str, "WorkerProcessorTable"] = dataclasses.field(default_factory=dict)
|
|
17
|
+
_lock: Lock = Lock()
|
|
18
|
+
|
|
19
|
+
@ui.refreshable
|
|
20
|
+
def draw_section(self):
|
|
21
|
+
with self._lock:
|
|
22
|
+
for processor_table in self.workers.values():
|
|
23
|
+
processor_table.draw_table()
|
|
24
|
+
|
|
25
|
+
def update_data(self, data: List[WorkerStatus]):
|
|
26
|
+
with self._lock:
|
|
27
|
+
for worker in data:
|
|
28
|
+
worker_name = worker.worker_id.decode()
|
|
29
|
+
processor_table = self.workers.get(worker_name)
|
|
30
|
+
|
|
31
|
+
if processor_table is None:
|
|
32
|
+
processor_table = WorkerProcessorTable(worker_name, 0, worker.rss_free, worker.processor_statuses)
|
|
33
|
+
self.workers[worker_name] = processor_table
|
|
34
|
+
elif processor_table.processor_statuses != worker.processor_statuses:
|
|
35
|
+
processor_table.processor_statuses = worker.processor_statuses
|
|
36
|
+
|
|
37
|
+
def remove_worker(self, dead_worker: str):
|
|
38
|
+
with self._lock:
|
|
39
|
+
self.workers.pop(dead_worker, None)
|
|
40
|
+
|
|
41
|
+
def handle_task_state(self, _: StateTask):
|
|
42
|
+
return
|
|
43
|
+
|
|
44
|
+
def handle_worker_state(self, state_worker: StateWorker):
|
|
45
|
+
worker_id = state_worker.worker_id.decode()
|
|
46
|
+
state = state_worker.state
|
|
47
|
+
|
|
48
|
+
if state == WorkerState.Disconnected:
|
|
49
|
+
self.remove_worker(worker_id)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclasses.dataclass
|
|
53
|
+
class WorkerProcessorTable:
|
|
54
|
+
worker_name: str
|
|
55
|
+
rss_max: int
|
|
56
|
+
rss_free: int
|
|
57
|
+
processor_statuses: List[ProcessorStatus]
|
|
58
|
+
|
|
59
|
+
handler: Optional[Element] = dataclasses.field(default=None)
|
|
60
|
+
|
|
61
|
+
def draw_table(self):
|
|
62
|
+
formatted_worker_name = format_worker_name(self.worker_name)
|
|
63
|
+
with ui.card().classes("w-full") as handler:
|
|
64
|
+
self.handler = handler
|
|
65
|
+
|
|
66
|
+
ui.markdown(f"Worker **{formatted_worker_name}**").classes("text-xl")
|
|
67
|
+
|
|
68
|
+
with ui.grid(columns=7).classes("w-full"):
|
|
69
|
+
self.draw_titles()
|
|
70
|
+
for processor in sorted(self.processor_statuses, key=lambda x: x.pid):
|
|
71
|
+
if processor.resource.rss > self.rss_max:
|
|
72
|
+
self.rss_max = processor.resource.rss
|
|
73
|
+
|
|
74
|
+
self.draw_row(processor, self.rss_free, self.rss_max)
|
|
75
|
+
|
|
76
|
+
@staticmethod
|
|
77
|
+
def draw_titles():
|
|
78
|
+
ui.label("Processor PID")
|
|
79
|
+
ui.label("CPU %")
|
|
80
|
+
ui.label("RSS (in MB)")
|
|
81
|
+
ui.label("Max RSS (in MB)")
|
|
82
|
+
ui.label("Initialized")
|
|
83
|
+
ui.label("Has Task")
|
|
84
|
+
ui.label("Suspended")
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def draw_row(processor_status: ProcessorStatus, rss_free: int, rss_max: int):
|
|
88
|
+
cpu = processor_status.resource.cpu / 10
|
|
89
|
+
rss = int(processor_status.resource.rss / 1e6)
|
|
90
|
+
rss_max = int(rss_max / 1e6)
|
|
91
|
+
rss_free = int(rss_free / 1e6)
|
|
92
|
+
|
|
93
|
+
ui.label(str(processor_status.pid))
|
|
94
|
+
ui.knob(value=cpu, track_color="grey-2", show_value=True, min=0, max=100)
|
|
95
|
+
ui.knob(value=rss, track_color="grey-2", show_value=True, min=0, max=rss + rss_free)
|
|
96
|
+
ui.knob(value=rss_max, track_color="grey-2", show_value=True, min=0, max=rss + rss_free)
|
|
97
|
+
ui.checkbox().bind_value_from(processor_status, "initialized")
|
|
98
|
+
ui.checkbox().bind_value_from(processor_status, "has_task")
|
|
99
|
+
ui.checkbox().bind_value_from(processor_status, "suspended")
|
|
100
|
+
|
|
101
|
+
def delete_row(self):
|
|
102
|
+
assert self.handler is not None
|
|
103
|
+
self.handler.clear()
|
|
104
|
+
self.handler.delete()
|
scaler/ui/v1.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import threading
|
|
2
|
+
from functools import partial
|
|
3
|
+
|
|
4
|
+
from nicegui import ui
|
|
5
|
+
|
|
6
|
+
from scaler.config.section.webui import WebUIConfig
|
|
7
|
+
from scaler.io.sync_subscriber import ZMQSyncSubscriber
|
|
8
|
+
from scaler.ui.common.constants import (
|
|
9
|
+
MEMORY_USAGE_UPDATE_INTERVAL,
|
|
10
|
+
TASK_LOG_REFRESH_INTERVAL,
|
|
11
|
+
TASK_STREAM_UPDATE_INTERVAL,
|
|
12
|
+
WORKER_PROCESSORS_REFRESH_INTERVAL,
|
|
13
|
+
)
|
|
14
|
+
from scaler.ui.common.live_display import SchedulerSection, WorkersSection
|
|
15
|
+
from scaler.ui.common.memory_window import MemoryChart
|
|
16
|
+
from scaler.ui.common.setting_page import Settings
|
|
17
|
+
from scaler.ui.common.task_graph import TaskStream
|
|
18
|
+
from scaler.ui.common.task_log import TaskLogTable
|
|
19
|
+
from scaler.ui.common.webui import Sections, process_scheduler_message
|
|
20
|
+
from scaler.ui.common.worker_processors import WorkerProcessors
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def start_webui_v1(config: WebUIConfig):
|
|
24
|
+
tables = Sections(
|
|
25
|
+
scheduler_section=SchedulerSection(),
|
|
26
|
+
workers_section=WorkersSection(),
|
|
27
|
+
task_stream_section=TaskStream(),
|
|
28
|
+
memory_usage_section=MemoryChart(),
|
|
29
|
+
tasklog_section=TaskLogTable(),
|
|
30
|
+
worker_processors=WorkerProcessors(),
|
|
31
|
+
settings_section=Settings(),
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
with ui.tabs().classes("w-full h-full") as tabs:
|
|
35
|
+
live_tab = ui.tab("Live")
|
|
36
|
+
tasklog_tab = ui.tab("Task Log")
|
|
37
|
+
stream_tab = ui.tab("Worker Task Stream")
|
|
38
|
+
worker_processors_tab = ui.tab("Worker Processors")
|
|
39
|
+
settings_tab = ui.tab("Settings")
|
|
40
|
+
|
|
41
|
+
with ui.tab_panels(tabs, value=live_tab).classes("w-full"):
|
|
42
|
+
with ui.tab_panel(live_tab):
|
|
43
|
+
tables.scheduler_section.draw_section()
|
|
44
|
+
tables.workers_section.draw_section() # type: ignore[call-arg]
|
|
45
|
+
|
|
46
|
+
with ui.tab_panel(tasklog_tab):
|
|
47
|
+
tables.tasklog_section.draw_section() # type: ignore[call-arg]
|
|
48
|
+
ui.timer(TASK_LOG_REFRESH_INTERVAL, tables.tasklog_section.draw_section.refresh, active=True)
|
|
49
|
+
|
|
50
|
+
with ui.tab_panel(stream_tab):
|
|
51
|
+
tables.task_stream_section.setup_task_stream(tables.settings_section)
|
|
52
|
+
ui.timer(TASK_STREAM_UPDATE_INTERVAL, tables.task_stream_section.update_plot, active=True)
|
|
53
|
+
|
|
54
|
+
tables.memory_usage_section.setup_memory_chart(tables.settings_section)
|
|
55
|
+
ui.timer(MEMORY_USAGE_UPDATE_INTERVAL, tables.memory_usage_section.update_plot, active=True)
|
|
56
|
+
|
|
57
|
+
with ui.tab_panel(worker_processors_tab):
|
|
58
|
+
tables.worker_processors.draw_section() # type: ignore[call-arg]
|
|
59
|
+
ui.timer(WORKER_PROCESSORS_REFRESH_INTERVAL, tables.worker_processors.draw_section.refresh, active=True)
|
|
60
|
+
|
|
61
|
+
with ui.tab_panel(settings_tab):
|
|
62
|
+
tables.settings_section.draw_section()
|
|
63
|
+
|
|
64
|
+
subscriber = ZMQSyncSubscriber(
|
|
65
|
+
address=config.monitor_address,
|
|
66
|
+
callback=partial(process_scheduler_message, tables=tables),
|
|
67
|
+
topic=b"",
|
|
68
|
+
timeout_seconds=-1,
|
|
69
|
+
)
|
|
70
|
+
subscriber.start()
|
|
71
|
+
|
|
72
|
+
ui_thread = threading.Thread(
|
|
73
|
+
target=partial(ui.run, host=config.web_host, port=config.web_port, reload=False), daemon=False
|
|
74
|
+
)
|
|
75
|
+
ui_thread.start()
|
|
76
|
+
ui_thread.join()
|
scaler/ui/v2.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import threading
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from nicegui import Event, app, ui # type: ignore[attr-defined]
|
|
5
|
+
|
|
6
|
+
from scaler.config.section.webui import WebUIConfig
|
|
7
|
+
from scaler.io.sync_subscriber import ZMQSyncSubscriber
|
|
8
|
+
from scaler.protocol.python.mixins import Message
|
|
9
|
+
from scaler.ui.common.constants import (
|
|
10
|
+
MEMORY_USAGE_UPDATE_INTERVAL,
|
|
11
|
+
TASK_LOG_REFRESH_INTERVAL,
|
|
12
|
+
TASK_STREAM_UPDATE_INTERVAL,
|
|
13
|
+
WORKER_PROCESSORS_REFRESH_INTERVAL,
|
|
14
|
+
)
|
|
15
|
+
from scaler.ui.common.live_display import SchedulerSection, WorkersSection
|
|
16
|
+
from scaler.ui.common.memory_window import MemoryChart
|
|
17
|
+
from scaler.ui.common.setting_page import Settings
|
|
18
|
+
from scaler.ui.common.task_graph import TaskStream
|
|
19
|
+
from scaler.ui.common.task_log import TaskLogTable
|
|
20
|
+
from scaler.ui.common.webui import Sections, process_scheduler_message
|
|
21
|
+
from scaler.ui.common.worker_processors import WorkerProcessors
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class WebUI:
|
|
25
|
+
def __init__(self) -> None:
|
|
26
|
+
self.scheduler_message = Event[Message]()
|
|
27
|
+
self.tables: Optional[Sections] = None
|
|
28
|
+
|
|
29
|
+
def start(self, host: str, port: int) -> None:
|
|
30
|
+
"""Start the NiceGUI server in a separate thread."""
|
|
31
|
+
started = threading.Event()
|
|
32
|
+
app.on_startup(started.set)
|
|
33
|
+
thread = threading.Thread(
|
|
34
|
+
target=lambda: ui.run(self.root, host=host, port=port, reload=False), # type: ignore[misc,arg-type]
|
|
35
|
+
daemon=True,
|
|
36
|
+
)
|
|
37
|
+
thread.start()
|
|
38
|
+
if not started.wait(timeout=3.0):
|
|
39
|
+
raise RuntimeError("NiceGUI did not start within 3 seconds.")
|
|
40
|
+
|
|
41
|
+
def root(self) -> None:
|
|
42
|
+
"""Create the UI for each new visitor."""
|
|
43
|
+
self.scheduler_message.subscribe(self.handle_message)
|
|
44
|
+
tables = Sections(
|
|
45
|
+
scheduler_section=SchedulerSection(),
|
|
46
|
+
workers_section=WorkersSection(),
|
|
47
|
+
task_stream_section=TaskStream(),
|
|
48
|
+
memory_usage_section=MemoryChart(),
|
|
49
|
+
tasklog_section=TaskLogTable(),
|
|
50
|
+
worker_processors=WorkerProcessors(),
|
|
51
|
+
settings_section=Settings(),
|
|
52
|
+
)
|
|
53
|
+
self.tables = tables
|
|
54
|
+
|
|
55
|
+
with ui.tabs().classes("w-full h-full") as tabs:
|
|
56
|
+
live_tab = ui.tab("Live")
|
|
57
|
+
tasklog_tab = ui.tab("Task Log")
|
|
58
|
+
stream_tab = ui.tab("Worker Task Stream")
|
|
59
|
+
worker_processors_tab = ui.tab("Worker Processors")
|
|
60
|
+
settings_tab = ui.tab("Settings")
|
|
61
|
+
|
|
62
|
+
with ui.tab_panels(tabs, value=live_tab).classes("w-full"):
|
|
63
|
+
with ui.tab_panel(live_tab):
|
|
64
|
+
tables.scheduler_section.draw_section()
|
|
65
|
+
tables.workers_section.draw_section() # type: ignore[call-arg]
|
|
66
|
+
|
|
67
|
+
with ui.tab_panel(tasklog_tab):
|
|
68
|
+
tables.tasklog_section.draw_section() # type: ignore[call-arg]
|
|
69
|
+
ui.timer(TASK_LOG_REFRESH_INTERVAL, tables.tasklog_section.draw_section.refresh, active=True)
|
|
70
|
+
|
|
71
|
+
with ui.tab_panel(stream_tab):
|
|
72
|
+
tables.task_stream_section.setup_task_stream(tables.settings_section)
|
|
73
|
+
ui.timer(TASK_STREAM_UPDATE_INTERVAL, tables.task_stream_section.update_plot, active=True)
|
|
74
|
+
|
|
75
|
+
tables.memory_usage_section.setup_memory_chart(tables.settings_section)
|
|
76
|
+
ui.timer(MEMORY_USAGE_UPDATE_INTERVAL, tables.memory_usage_section.update_plot, active=True)
|
|
77
|
+
|
|
78
|
+
with ui.tab_panel(worker_processors_tab):
|
|
79
|
+
tables.worker_processors.draw_section() # type: ignore[call-arg]
|
|
80
|
+
ui.timer(WORKER_PROCESSORS_REFRESH_INTERVAL, tables.worker_processors.draw_section.refresh, active=True)
|
|
81
|
+
|
|
82
|
+
with ui.tab_panel(settings_tab):
|
|
83
|
+
tables.settings_section.draw_section()
|
|
84
|
+
|
|
85
|
+
def new_message(self, status: Message):
|
|
86
|
+
self.scheduler_message.emit(status)
|
|
87
|
+
|
|
88
|
+
def handle_message(self, status: Message):
|
|
89
|
+
process_scheduler_message(status, self.tables)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def start_webui_v2(config: WebUIConfig):
|
|
93
|
+
webui = WebUI()
|
|
94
|
+
webui.start(config.web_host, config.web_port)
|
|
95
|
+
|
|
96
|
+
subscriber = ZMQSyncSubscriber(
|
|
97
|
+
address=config.monitor_address, callback=webui.new_message, topic=b"", timeout_seconds=-1
|
|
98
|
+
)
|
|
99
|
+
subscriber.start()
|
|
100
|
+
|
|
101
|
+
while True:
|
|
102
|
+
pass
|
scaler/ui/webui.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from scaler.config.section.webui import WebUIConfig
|
|
4
|
+
from scaler.ui.util import NICEGUI_MAJOR_VERSION
|
|
5
|
+
from scaler.utility.logging.utility import setup_logger
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def start_webui(config: WebUIConfig):
|
|
9
|
+
|
|
10
|
+
setup_logger(config.logging_config.paths, config.logging_config.config_file, config.logging_config.level)
|
|
11
|
+
|
|
12
|
+
if NICEGUI_MAJOR_VERSION < 3:
|
|
13
|
+
logging.info(f"Detected {NICEGUI_MAJOR_VERSION}. Using GUI v1.")
|
|
14
|
+
from scaler.ui.v1 import start_webui_v1
|
|
15
|
+
|
|
16
|
+
start_webui_v1(config)
|
|
17
|
+
else:
|
|
18
|
+
logging.info(f"Detected {NICEGUI_MAJOR_VERSION}. Using GUI v2.")
|
|
19
|
+
from scaler.ui.v2 import start_webui_v2
|
|
20
|
+
|
|
21
|
+
start_webui_v2(config)
|
|
File without changes
|
scaler/utility/debug.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
import pdb
|
|
3
|
+
import sys
|
|
4
|
+
from typing import Callable
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def pdb_wrapped(func: Callable):
|
|
8
|
+
@functools.wraps(func)
|
|
9
|
+
def pdb_wrapper(*args, **kwargs):
|
|
10
|
+
try:
|
|
11
|
+
exit_code = func(*args, **kwargs)
|
|
12
|
+
sys.exit(exit_code)
|
|
13
|
+
|
|
14
|
+
except Exception:
|
|
15
|
+
ex_type, value, tb = sys.exc_info()
|
|
16
|
+
pdb.post_mortem(tb)
|
|
17
|
+
raise
|
|
18
|
+
|
|
19
|
+
return pdb_wrapper
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
from typing import Callable
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class EventList(collections.UserList):
|
|
6
|
+
"""A list that emits events when it is modified."""
|
|
7
|
+
|
|
8
|
+
def __init__(self, initlist=None):
|
|
9
|
+
super().__init__(initlist=initlist)
|
|
10
|
+
self._callbacks = []
|
|
11
|
+
|
|
12
|
+
def add_update_callback(self, callback: Callable[["EventList"], None]):
|
|
13
|
+
self._callbacks.append(callback)
|
|
14
|
+
|
|
15
|
+
def __setitem__(self, i, item):
|
|
16
|
+
super().__setitem__(i, item)
|
|
17
|
+
self._list_updated()
|
|
18
|
+
|
|
19
|
+
def __delitem__(self, i):
|
|
20
|
+
super().__delitem__(i)
|
|
21
|
+
self._list_updated()
|
|
22
|
+
|
|
23
|
+
def __add__(self, other):
|
|
24
|
+
super().__add__(other)
|
|
25
|
+
self._list_updated()
|
|
26
|
+
|
|
27
|
+
def __iadd__(self, other):
|
|
28
|
+
super().__iadd__(other)
|
|
29
|
+
self._list_updated()
|
|
30
|
+
return self
|
|
31
|
+
|
|
32
|
+
def append(self, item):
|
|
33
|
+
super().append(item)
|
|
34
|
+
self._list_updated()
|
|
35
|
+
|
|
36
|
+
def insert(self, i, item):
|
|
37
|
+
super().insert(i, item)
|
|
38
|
+
self._list_updated()
|
|
39
|
+
|
|
40
|
+
def pop(self, i: int = -1):
|
|
41
|
+
v = super().pop(i)
|
|
42
|
+
self._list_updated()
|
|
43
|
+
return v
|
|
44
|
+
|
|
45
|
+
def remove(self, item):
|
|
46
|
+
super().remove(item)
|
|
47
|
+
self._list_updated()
|
|
48
|
+
|
|
49
|
+
def clear(self) -> None:
|
|
50
|
+
super().clear()
|
|
51
|
+
self._list_updated()
|
|
52
|
+
|
|
53
|
+
def sort(self, /, *args, **kwargs):
|
|
54
|
+
super().sort(*args, **kwargs)
|
|
55
|
+
self._list_updated()
|
|
56
|
+
|
|
57
|
+
def extend(self, other) -> None:
|
|
58
|
+
super().extend(other)
|
|
59
|
+
self._list_updated()
|
|
60
|
+
|
|
61
|
+
def _list_updated(self):
|
|
62
|
+
for callback in self._callbacks:
|
|
63
|
+
callback(self)
|