opengris-scaler 1.12.28__cp313-cp313-musllinux_1_2_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of opengris-scaler might be problematic. Click here for more details.
- opengris_scaler-1.12.28.dist-info/METADATA +728 -0
- opengris_scaler-1.12.28.dist-info/RECORD +187 -0
- opengris_scaler-1.12.28.dist-info/WHEEL +5 -0
- opengris_scaler-1.12.28.dist-info/entry_points.txt +10 -0
- opengris_scaler-1.12.28.dist-info/licenses/LICENSE +201 -0
- opengris_scaler-1.12.28.dist-info/licenses/LICENSE.spdx +7 -0
- opengris_scaler-1.12.28.dist-info/licenses/NOTICE +8 -0
- opengris_scaler.libs/libcapnp-1-e88d5415.0.1.so +0 -0
- opengris_scaler.libs/libgcc_s-2298274a.so.1 +0 -0
- opengris_scaler.libs/libkj-1-9bebd8ac.0.1.so +0 -0
- opengris_scaler.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
- scaler/__init__.py +14 -0
- scaler/about.py +5 -0
- scaler/client/__init__.py +0 -0
- scaler/client/agent/__init__.py +0 -0
- scaler/client/agent/client_agent.py +210 -0
- scaler/client/agent/disconnect_manager.py +27 -0
- scaler/client/agent/future_manager.py +112 -0
- scaler/client/agent/heartbeat_manager.py +74 -0
- scaler/client/agent/mixins.py +89 -0
- scaler/client/agent/object_manager.py +98 -0
- scaler/client/agent/task_manager.py +64 -0
- scaler/client/client.py +658 -0
- scaler/client/future.py +252 -0
- scaler/client/object_buffer.py +129 -0
- scaler/client/object_reference.py +25 -0
- scaler/client/serializer/__init__.py +0 -0
- scaler/client/serializer/default.py +16 -0
- scaler/client/serializer/mixins.py +38 -0
- scaler/cluster/__init__.py +0 -0
- scaler/cluster/cluster.py +115 -0
- scaler/cluster/combo.py +150 -0
- scaler/cluster/object_storage_server.py +45 -0
- scaler/cluster/scheduler.py +86 -0
- scaler/config/__init__.py +0 -0
- scaler/config/defaults.py +94 -0
- scaler/config/loader.py +96 -0
- scaler/config/mixins.py +20 -0
- scaler/config/section/__init__.py +0 -0
- scaler/config/section/cluster.py +55 -0
- scaler/config/section/ecs_worker_adapter.py +85 -0
- scaler/config/section/native_worker_adapter.py +43 -0
- scaler/config/section/object_storage_server.py +8 -0
- scaler/config/section/scheduler.py +54 -0
- scaler/config/section/symphony_worker_adapter.py +47 -0
- scaler/config/section/top.py +13 -0
- scaler/config/section/webui.py +21 -0
- scaler/config/types/__init__.py +0 -0
- scaler/config/types/network_backend.py +12 -0
- scaler/config/types/object_storage_server.py +45 -0
- scaler/config/types/worker.py +62 -0
- scaler/config/types/zmq.py +83 -0
- scaler/entry_points/__init__.py +0 -0
- scaler/entry_points/cluster.py +133 -0
- scaler/entry_points/object_storage_server.py +45 -0
- scaler/entry_points/scheduler.py +144 -0
- scaler/entry_points/top.py +286 -0
- scaler/entry_points/webui.py +48 -0
- scaler/entry_points/worker_adapter_ecs.py +191 -0
- scaler/entry_points/worker_adapter_native.py +137 -0
- scaler/entry_points/worker_adapter_symphony.py +98 -0
- scaler/io/__init__.py +0 -0
- scaler/io/async_binder.py +89 -0
- scaler/io/async_connector.py +95 -0
- scaler/io/async_object_storage_connector.py +225 -0
- scaler/io/mixins.py +154 -0
- scaler/io/sync_connector.py +68 -0
- scaler/io/sync_object_storage_connector.py +247 -0
- scaler/io/sync_subscriber.py +83 -0
- scaler/io/utility.py +80 -0
- scaler/io/ymq/__init__.py +0 -0
- scaler/io/ymq/_ymq.pyi +95 -0
- scaler/io/ymq/ymq.py +138 -0
- scaler/io/ymq_async_object_storage_connector.py +184 -0
- scaler/io/ymq_sync_object_storage_connector.py +184 -0
- scaler/object_storage/__init__.py +0 -0
- scaler/protocol/__init__.py +0 -0
- scaler/protocol/capnp/__init__.py +0 -0
- scaler/protocol/capnp/_python.py +6 -0
- scaler/protocol/capnp/common.capnp +68 -0
- scaler/protocol/capnp/message.capnp +218 -0
- scaler/protocol/capnp/object_storage.capnp +57 -0
- scaler/protocol/capnp/status.capnp +73 -0
- scaler/protocol/introduction.md +105 -0
- scaler/protocol/python/__init__.py +0 -0
- scaler/protocol/python/common.py +140 -0
- scaler/protocol/python/message.py +751 -0
- scaler/protocol/python/mixins.py +13 -0
- scaler/protocol/python/object_storage.py +118 -0
- scaler/protocol/python/status.py +279 -0
- scaler/protocol/worker.md +228 -0
- scaler/scheduler/__init__.py +0 -0
- scaler/scheduler/allocate_policy/__init__.py +0 -0
- scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
- scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
- scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
- scaler/scheduler/allocate_policy/mixins.py +55 -0
- scaler/scheduler/controllers/__init__.py +0 -0
- scaler/scheduler/controllers/balance_controller.py +65 -0
- scaler/scheduler/controllers/client_controller.py +131 -0
- scaler/scheduler/controllers/config_controller.py +31 -0
- scaler/scheduler/controllers/graph_controller.py +424 -0
- scaler/scheduler/controllers/information_controller.py +81 -0
- scaler/scheduler/controllers/mixins.py +194 -0
- scaler/scheduler/controllers/object_controller.py +147 -0
- scaler/scheduler/controllers/scaling_policies/__init__.py +0 -0
- scaler/scheduler/controllers/scaling_policies/fixed_elastic.py +145 -0
- scaler/scheduler/controllers/scaling_policies/mixins.py +10 -0
- scaler/scheduler/controllers/scaling_policies/null.py +14 -0
- scaler/scheduler/controllers/scaling_policies/types.py +9 -0
- scaler/scheduler/controllers/scaling_policies/utility.py +20 -0
- scaler/scheduler/controllers/scaling_policies/vanilla.py +95 -0
- scaler/scheduler/controllers/task_controller.py +376 -0
- scaler/scheduler/controllers/worker_controller.py +169 -0
- scaler/scheduler/object_usage/__init__.py +0 -0
- scaler/scheduler/object_usage/object_tracker.py +131 -0
- scaler/scheduler/scheduler.py +251 -0
- scaler/scheduler/task/__init__.py +0 -0
- scaler/scheduler/task/task_state_machine.py +92 -0
- scaler/scheduler/task/task_state_manager.py +61 -0
- scaler/ui/__init__.py +0 -0
- scaler/ui/constants.py +9 -0
- scaler/ui/live_display.py +147 -0
- scaler/ui/memory_window.py +146 -0
- scaler/ui/setting_page.py +40 -0
- scaler/ui/task_graph.py +832 -0
- scaler/ui/task_log.py +107 -0
- scaler/ui/utility.py +66 -0
- scaler/ui/webui.py +147 -0
- scaler/ui/worker_processors.py +104 -0
- scaler/utility/__init__.py +0 -0
- scaler/utility/debug.py +19 -0
- scaler/utility/event_list.py +63 -0
- scaler/utility/event_loop.py +58 -0
- scaler/utility/exceptions.py +42 -0
- scaler/utility/formatter.py +44 -0
- scaler/utility/graph/__init__.py +0 -0
- scaler/utility/graph/optimization.py +27 -0
- scaler/utility/graph/topological_sorter.py +11 -0
- scaler/utility/graph/topological_sorter_graphblas.py +174 -0
- scaler/utility/identifiers.py +107 -0
- scaler/utility/logging/__init__.py +0 -0
- scaler/utility/logging/decorators.py +25 -0
- scaler/utility/logging/scoped_logger.py +33 -0
- scaler/utility/logging/utility.py +183 -0
- scaler/utility/many_to_many_dict.py +123 -0
- scaler/utility/metadata/__init__.py +0 -0
- scaler/utility/metadata/profile_result.py +31 -0
- scaler/utility/metadata/task_flags.py +30 -0
- scaler/utility/mixins.py +13 -0
- scaler/utility/network_util.py +7 -0
- scaler/utility/one_to_many_dict.py +72 -0
- scaler/utility/queues/__init__.py +0 -0
- scaler/utility/queues/async_indexed_queue.py +37 -0
- scaler/utility/queues/async_priority_queue.py +70 -0
- scaler/utility/queues/async_sorted_priority_queue.py +45 -0
- scaler/utility/queues/indexed_queue.py +114 -0
- scaler/utility/serialization.py +9 -0
- scaler/version.txt +1 -0
- scaler/worker/__init__.py +0 -0
- scaler/worker/agent/__init__.py +0 -0
- scaler/worker/agent/heartbeat_manager.py +107 -0
- scaler/worker/agent/mixins.py +137 -0
- scaler/worker/agent/processor/__init__.py +0 -0
- scaler/worker/agent/processor/object_cache.py +107 -0
- scaler/worker/agent/processor/processor.py +285 -0
- scaler/worker/agent/processor/streaming_buffer.py +28 -0
- scaler/worker/agent/processor_holder.py +147 -0
- scaler/worker/agent/processor_manager.py +369 -0
- scaler/worker/agent/profiling_manager.py +109 -0
- scaler/worker/agent/task_manager.py +150 -0
- scaler/worker/agent/timeout_manager.py +19 -0
- scaler/worker/preload.py +84 -0
- scaler/worker/worker.py +265 -0
- scaler/worker_adapter/__init__.py +0 -0
- scaler/worker_adapter/common.py +26 -0
- scaler/worker_adapter/ecs.py +269 -0
- scaler/worker_adapter/native.py +155 -0
- scaler/worker_adapter/symphony/__init__.py +0 -0
- scaler/worker_adapter/symphony/callback.py +45 -0
- scaler/worker_adapter/symphony/heartbeat_manager.py +79 -0
- scaler/worker_adapter/symphony/message.py +24 -0
- scaler/worker_adapter/symphony/task_manager.py +289 -0
- scaler/worker_adapter/symphony/worker.py +204 -0
- scaler/worker_adapter/symphony/worker_adapter.py +139 -0
- src/scaler/io/ymq/_ymq.so +0 -0
- src/scaler/object_storage/object_storage_server.so +0 -0
scaler/ui/task_log.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
from collections import deque
|
|
3
|
+
from threading import Lock
|
|
4
|
+
from typing import Deque, Dict, Optional
|
|
5
|
+
|
|
6
|
+
from nicegui import ui
|
|
7
|
+
|
|
8
|
+
from scaler.protocol.python.common import TaskState
|
|
9
|
+
from scaler.protocol.python.message import StateTask, StateWorker
|
|
10
|
+
from scaler.ui.utility import COMPLETED_TASK_STATUSES, display_capabilities
|
|
11
|
+
from scaler.utility.formatter import format_bytes
|
|
12
|
+
from scaler.utility.metadata.profile_result import ProfileResult
|
|
13
|
+
|
|
14
|
+
TASK_ID_HTML_TEMPLATE = (
|
|
15
|
+
"<span "
|
|
16
|
+
"style='display:inline-block;max-width:12rem;overflow:hidden;text-overflow:ellipsis;"
|
|
17
|
+
"white-space:nowrap;cursor:pointer;font:inherit;color:inherit' "
|
|
18
|
+
"title='{task}' onclick=\"navigator.clipboard.writeText('{task}')\">{task}</span>"
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclasses.dataclass
|
|
23
|
+
class TaskData:
|
|
24
|
+
task: str = dataclasses.field(default="")
|
|
25
|
+
function: str = dataclasses.field(default="")
|
|
26
|
+
duration: str = dataclasses.field(default="")
|
|
27
|
+
peak_mem: str = dataclasses.field(default="")
|
|
28
|
+
status: str = dataclasses.field(default="")
|
|
29
|
+
capabilities: str = dataclasses.field(default="")
|
|
30
|
+
|
|
31
|
+
def populate(
|
|
32
|
+
self,
|
|
33
|
+
state: StateTask,
|
|
34
|
+
function_name: str,
|
|
35
|
+
profiling_data: Optional[ProfileResult],
|
|
36
|
+
task_capabilities: Dict[str, int],
|
|
37
|
+
):
|
|
38
|
+
self.task = f"{state.task_id.hex()}"
|
|
39
|
+
self.function = function_name
|
|
40
|
+
self.status = state.state.name
|
|
41
|
+
|
|
42
|
+
if profiling_data:
|
|
43
|
+
duration = profiling_data.duration_s
|
|
44
|
+
mem = profiling_data.memory_peak
|
|
45
|
+
self.duration = f"{duration:.2f}s"
|
|
46
|
+
self.peak_mem = format_bytes(mem) if mem != 0 else "0"
|
|
47
|
+
else:
|
|
48
|
+
self.duration = "N/A"
|
|
49
|
+
self.peak_mem = "N/A"
|
|
50
|
+
|
|
51
|
+
self.capabilities = display_capabilities(set(task_capabilities.keys()))
|
|
52
|
+
|
|
53
|
+
def draw_row(self):
|
|
54
|
+
color = "color: green" if self.status == TaskState.Success.name else "color: red"
|
|
55
|
+
ui.html(TASK_ID_HTML_TEMPLATE.format(task=self.task))
|
|
56
|
+
ui.label(self.function)
|
|
57
|
+
ui.label(self.duration)
|
|
58
|
+
ui.label(self.peak_mem)
|
|
59
|
+
ui.label(self.status).style(color)
|
|
60
|
+
ui.label(self.capabilities)
|
|
61
|
+
|
|
62
|
+
@staticmethod
|
|
63
|
+
def draw_titles():
|
|
64
|
+
ui.label("Task ID")
|
|
65
|
+
ui.label("Function")
|
|
66
|
+
ui.label("Duration")
|
|
67
|
+
ui.label("Peak mem")
|
|
68
|
+
ui.label("Status")
|
|
69
|
+
ui.label("Capabilities")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class TaskLogTable:
|
|
73
|
+
def __init__(self):
|
|
74
|
+
self._task_log: Deque[TaskData] = deque(maxlen=100)
|
|
75
|
+
self._task_id_to_function_name: Dict[str, str] = {}
|
|
76
|
+
self._lock: Lock = Lock()
|
|
77
|
+
|
|
78
|
+
def handle_task_state(self, state_task: StateTask):
|
|
79
|
+
if state_task.function_name != b"" and state_task.task_id.hex() not in self._task_id_to_function_name:
|
|
80
|
+
self._task_id_to_function_name[state_task.task_id.hex()] = state_task.function_name.decode()
|
|
81
|
+
|
|
82
|
+
if state_task.state not in COMPLETED_TASK_STATUSES:
|
|
83
|
+
return
|
|
84
|
+
|
|
85
|
+
function_name = state_task.function_name.decode()
|
|
86
|
+
if function_name == "":
|
|
87
|
+
function_name = self._task_id_to_function_name.pop(state_task.task_id.hex(), "")
|
|
88
|
+
|
|
89
|
+
# Canceled/failed states don't have profiling metadata
|
|
90
|
+
profiling_data = ProfileResult.deserialize(state_task.metadata) if state_task.metadata != b"" else None
|
|
91
|
+
|
|
92
|
+
row = TaskData()
|
|
93
|
+
row.populate(state_task, function_name, profiling_data, state_task.capabilities)
|
|
94
|
+
|
|
95
|
+
with self._lock:
|
|
96
|
+
self._task_log.appendleft(row)
|
|
97
|
+
|
|
98
|
+
def handle_worker_state(self, _: StateWorker):
|
|
99
|
+
return
|
|
100
|
+
|
|
101
|
+
@ui.refreshable
|
|
102
|
+
def draw_section(self):
|
|
103
|
+
with self._lock:
|
|
104
|
+
with ui.card().classes("w-full q-mx-auto"), ui.grid(columns=6).classes("q-mx-auto"):
|
|
105
|
+
TaskData.draw_titles()
|
|
106
|
+
for task in self._task_log:
|
|
107
|
+
task.draw_row()
|
scaler/ui/utility.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from typing import List, Set, Tuple
|
|
3
|
+
|
|
4
|
+
from scaler.protocol.python.common import TaskState
|
|
5
|
+
from scaler.ui.setting_page import Settings
|
|
6
|
+
|
|
7
|
+
COMPLETED_TASK_STATUSES = {
|
|
8
|
+
TaskState.Success,
|
|
9
|
+
TaskState.Canceled,
|
|
10
|
+
TaskState.CanceledNotFound,
|
|
11
|
+
TaskState.Failed,
|
|
12
|
+
TaskState.FailedWorkerDied,
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def format_timediff(a: datetime.datetime, b: datetime.datetime) -> float:
|
|
17
|
+
return (b - a).total_seconds()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def format_worker_name(worker_name: str, cutoff: int = 15) -> str:
|
|
21
|
+
return worker_name[:cutoff] + "+" if len(worker_name) > cutoff else worker_name
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_bounds(now: datetime.datetime, start_time: datetime.datetime, settings: Settings) -> Tuple[int, int]:
|
|
25
|
+
upper_range = now - start_time
|
|
26
|
+
lower_range = upper_range - settings.stream_window
|
|
27
|
+
|
|
28
|
+
bound_upper_seconds = max(upper_range.seconds, settings.stream_window.seconds)
|
|
29
|
+
bound_lower_seconds = 0 if bound_upper_seconds == settings.stream_window.seconds else lower_range.seconds
|
|
30
|
+
|
|
31
|
+
return bound_lower_seconds, bound_upper_seconds
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def make_taskstream_ticks(lower_bound: int, upper_bound: int) -> List[int]:
|
|
35
|
+
distance = (upper_bound - lower_bound) // 6
|
|
36
|
+
return list(range(lower_bound, upper_bound + 1, distance))
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def make_memory_ticks(max_bytes: int) -> Tuple[List[int], List[str]]:
|
|
40
|
+
units = ["B", "KB", "MB", "GB", "TB"]
|
|
41
|
+
vals: List[int] = [0]
|
|
42
|
+
texts: List[str] = ["0"]
|
|
43
|
+
v = 1
|
|
44
|
+
i = 0
|
|
45
|
+
# ensure at least up to 1GB on empty data
|
|
46
|
+
target = max(1024 * 1024 * 1024, max_bytes)
|
|
47
|
+
while i < len(units) and v <= target:
|
|
48
|
+
vals.append(v)
|
|
49
|
+
texts.append(f"1{units[i]}")
|
|
50
|
+
v *= 1024
|
|
51
|
+
i += 1
|
|
52
|
+
return vals, texts
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def make_tick_text(window_length: int) -> List[int]:
|
|
56
|
+
upper = 0
|
|
57
|
+
lower = -1 * window_length
|
|
58
|
+
distance = (upper - lower) // 6
|
|
59
|
+
return list(range(lower, upper + 1, distance))
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def display_capabilities(capabilities: Set[str]) -> str:
|
|
63
|
+
if not capabilities or len(capabilities) == 0:
|
|
64
|
+
return "<no capabilities>"
|
|
65
|
+
|
|
66
|
+
return " & ".join(sorted(capabilities))
|
scaler/ui/webui.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import logging
|
|
3
|
+
import threading
|
|
4
|
+
from functools import partial
|
|
5
|
+
from typing import Optional, Tuple
|
|
6
|
+
|
|
7
|
+
from nicegui import ui
|
|
8
|
+
|
|
9
|
+
from scaler.config.types.zmq import ZMQConfig
|
|
10
|
+
from scaler.io.sync_subscriber import ZMQSyncSubscriber
|
|
11
|
+
from scaler.protocol.python.message import StateBalanceAdvice, StateScheduler, StateTask, StateWorker
|
|
12
|
+
from scaler.protocol.python.mixins import Message
|
|
13
|
+
from scaler.ui.constants import (
|
|
14
|
+
MEMORY_USAGE_UPDATE_INTERVAL,
|
|
15
|
+
TASK_LOG_REFRESH_INTERVAL,
|
|
16
|
+
TASK_STREAM_UPDATE_INTERVAL,
|
|
17
|
+
WORKER_PROCESSORS_REFRESH_INTERVAL,
|
|
18
|
+
)
|
|
19
|
+
from scaler.ui.live_display import SchedulerSection, WorkersSection
|
|
20
|
+
from scaler.ui.memory_window import MemoryChart
|
|
21
|
+
from scaler.ui.setting_page import Settings
|
|
22
|
+
from scaler.ui.task_graph import TaskStream
|
|
23
|
+
from scaler.ui.task_log import TaskLogTable
|
|
24
|
+
from scaler.ui.worker_processors import WorkerProcessors
|
|
25
|
+
from scaler.utility.formatter import format_bytes, format_percentage
|
|
26
|
+
from scaler.utility.logging.utility import setup_logger
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclasses.dataclass
|
|
30
|
+
class Sections:
|
|
31
|
+
scheduler_section: SchedulerSection
|
|
32
|
+
workers_section: WorkersSection
|
|
33
|
+
task_stream_section: TaskStream
|
|
34
|
+
memory_usage_section: MemoryChart
|
|
35
|
+
tasklog_section: TaskLogTable
|
|
36
|
+
worker_processors: WorkerProcessors
|
|
37
|
+
settings_section: Settings
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def start_webui(
|
|
41
|
+
address: str,
|
|
42
|
+
host: str,
|
|
43
|
+
port: int,
|
|
44
|
+
logging_paths: Tuple[str, ...],
|
|
45
|
+
logging_config_file: Optional[str],
|
|
46
|
+
logging_level: str,
|
|
47
|
+
):
|
|
48
|
+
|
|
49
|
+
setup_logger(logging_paths, logging_config_file, logging_level)
|
|
50
|
+
|
|
51
|
+
tables = Sections(
|
|
52
|
+
scheduler_section=SchedulerSection(),
|
|
53
|
+
workers_section=WorkersSection(),
|
|
54
|
+
task_stream_section=TaskStream(),
|
|
55
|
+
memory_usage_section=MemoryChart(),
|
|
56
|
+
tasklog_section=TaskLogTable(),
|
|
57
|
+
worker_processors=WorkerProcessors(),
|
|
58
|
+
settings_section=Settings(),
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
with ui.tabs().classes("w-full h-full") as tabs:
|
|
62
|
+
live_tab = ui.tab("Live")
|
|
63
|
+
tasklog_tab = ui.tab("Task Log")
|
|
64
|
+
stream_tab = ui.tab("Worker Task Stream")
|
|
65
|
+
worker_processors_tab = ui.tab("Worker Processors")
|
|
66
|
+
settings_tab = ui.tab("Settings")
|
|
67
|
+
|
|
68
|
+
with ui.tab_panels(tabs, value=live_tab).classes("w-full"):
|
|
69
|
+
with ui.tab_panel(live_tab):
|
|
70
|
+
tables.scheduler_section.draw_section()
|
|
71
|
+
tables.workers_section.draw_section() # type: ignore[call-arg]
|
|
72
|
+
|
|
73
|
+
with ui.tab_panel(tasklog_tab):
|
|
74
|
+
tables.tasklog_section.draw_section() # type: ignore[call-arg]
|
|
75
|
+
ui.timer(TASK_LOG_REFRESH_INTERVAL, tables.tasklog_section.draw_section.refresh, active=True)
|
|
76
|
+
|
|
77
|
+
with ui.tab_panel(stream_tab):
|
|
78
|
+
tables.task_stream_section.setup_task_stream(tables.settings_section)
|
|
79
|
+
ui.timer(TASK_STREAM_UPDATE_INTERVAL, tables.task_stream_section.update_plot, active=True)
|
|
80
|
+
|
|
81
|
+
tables.memory_usage_section.setup_memory_chart(tables.settings_section)
|
|
82
|
+
ui.timer(MEMORY_USAGE_UPDATE_INTERVAL, tables.memory_usage_section.update_plot, active=True)
|
|
83
|
+
|
|
84
|
+
with ui.tab_panel(worker_processors_tab):
|
|
85
|
+
tables.worker_processors.draw_section() # type: ignore[call-arg]
|
|
86
|
+
ui.timer(WORKER_PROCESSORS_REFRESH_INTERVAL, tables.worker_processors.draw_section.refresh, active=True)
|
|
87
|
+
|
|
88
|
+
with ui.tab_panel(settings_tab):
|
|
89
|
+
tables.settings_section.draw_section()
|
|
90
|
+
|
|
91
|
+
subscriber = ZMQSyncSubscriber(
|
|
92
|
+
address=ZMQConfig.from_string(address),
|
|
93
|
+
callback=partial(__show_status, tables=tables),
|
|
94
|
+
topic=b"",
|
|
95
|
+
timeout_seconds=-1,
|
|
96
|
+
)
|
|
97
|
+
subscriber.start()
|
|
98
|
+
|
|
99
|
+
ui_thread = threading.Thread(target=partial(ui.run, host=host, port=port, reload=False), daemon=False)
|
|
100
|
+
ui_thread.start()
|
|
101
|
+
ui_thread.join()
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def __show_status(status: Message, tables: Sections):
|
|
105
|
+
if isinstance(status, StateScheduler):
|
|
106
|
+
__update_scheduler_state(status, tables)
|
|
107
|
+
return
|
|
108
|
+
|
|
109
|
+
if isinstance(status, StateWorker):
|
|
110
|
+
logging.info(f"Received StateWorker update for worker {status.worker_id.decode()} with {status.state.name}")
|
|
111
|
+
tables.scheduler_section.handle_worker_state(status)
|
|
112
|
+
tables.workers_section.handle_worker_state(status)
|
|
113
|
+
tables.task_stream_section.handle_worker_state(status)
|
|
114
|
+
tables.memory_usage_section.handle_worker_state(status)
|
|
115
|
+
tables.tasklog_section.handle_worker_state(status)
|
|
116
|
+
tables.worker_processors.handle_worker_state(status)
|
|
117
|
+
tables.settings_section.handle_worker_state(status)
|
|
118
|
+
return
|
|
119
|
+
|
|
120
|
+
if isinstance(status, StateTask):
|
|
121
|
+
logging.debug(f"Received StateTask update for task {status.task_id.hex()} with {status.state.name}")
|
|
122
|
+
tables.scheduler_section.handle_task_state(status)
|
|
123
|
+
tables.workers_section.handle_task_state(status)
|
|
124
|
+
tables.task_stream_section.handle_task_state(status)
|
|
125
|
+
tables.memory_usage_section.handle_task_state(status)
|
|
126
|
+
tables.tasklog_section.handle_task_state(status)
|
|
127
|
+
tables.worker_processors.handle_task_state(status)
|
|
128
|
+
tables.settings_section.handle_task_state(status)
|
|
129
|
+
return
|
|
130
|
+
|
|
131
|
+
if isinstance(status, StateBalanceAdvice):
|
|
132
|
+
logging.debug(f"Received StateBalanceAdvice for {status.worker_id.decode()} with {len(status.task_ids)} tasks")
|
|
133
|
+
return
|
|
134
|
+
|
|
135
|
+
logging.info(f"Unhandled message received: {type(status)}")
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def __update_scheduler_state(data: StateScheduler, tables: Sections):
|
|
139
|
+
tables.scheduler_section.cpu = format_percentage(data.scheduler.cpu)
|
|
140
|
+
tables.scheduler_section.rss = format_bytes(data.scheduler.rss)
|
|
141
|
+
tables.scheduler_section.rss_free = format_bytes(data.rss_free)
|
|
142
|
+
|
|
143
|
+
for worker_data in data.worker_manager.workers:
|
|
144
|
+
worker_name = worker_data.worker_id.decode()
|
|
145
|
+
tables.workers_section.workers[worker_name].populate(worker_data)
|
|
146
|
+
|
|
147
|
+
tables.worker_processors.update_data(data.worker_manager.workers)
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
from threading import Lock
|
|
3
|
+
from typing import Dict, List, Optional
|
|
4
|
+
|
|
5
|
+
from nicegui import ui
|
|
6
|
+
from nicegui.element import Element
|
|
7
|
+
|
|
8
|
+
from scaler.protocol.python.common import WorkerState
|
|
9
|
+
from scaler.protocol.python.message import StateTask, StateWorker
|
|
10
|
+
from scaler.protocol.python.status import ProcessorStatus, WorkerStatus
|
|
11
|
+
from scaler.ui.utility import format_worker_name
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclasses.dataclass
|
|
15
|
+
class WorkerProcessors:
|
|
16
|
+
workers: Dict[str, "WorkerProcessorTable"] = dataclasses.field(default_factory=dict)
|
|
17
|
+
_lock: Lock = Lock()
|
|
18
|
+
|
|
19
|
+
@ui.refreshable
|
|
20
|
+
def draw_section(self):
|
|
21
|
+
with self._lock:
|
|
22
|
+
for processor_table in self.workers.values():
|
|
23
|
+
processor_table.draw_table()
|
|
24
|
+
|
|
25
|
+
def update_data(self, data: List[WorkerStatus]):
|
|
26
|
+
with self._lock:
|
|
27
|
+
for worker in data:
|
|
28
|
+
worker_name = worker.worker_id.decode()
|
|
29
|
+
processor_table = self.workers.get(worker_name)
|
|
30
|
+
|
|
31
|
+
if processor_table is None:
|
|
32
|
+
processor_table = WorkerProcessorTable(worker_name, 0, worker.rss_free, worker.processor_statuses)
|
|
33
|
+
self.workers[worker_name] = processor_table
|
|
34
|
+
elif processor_table.processor_statuses != worker.processor_statuses:
|
|
35
|
+
processor_table.processor_statuses = worker.processor_statuses
|
|
36
|
+
|
|
37
|
+
def remove_worker(self, dead_worker: str):
|
|
38
|
+
with self._lock:
|
|
39
|
+
self.workers.pop(dead_worker, None)
|
|
40
|
+
|
|
41
|
+
def handle_task_state(self, _: StateTask):
|
|
42
|
+
return
|
|
43
|
+
|
|
44
|
+
def handle_worker_state(self, state_worker: StateWorker):
|
|
45
|
+
worker_id = state_worker.worker_id.decode()
|
|
46
|
+
state = state_worker.state
|
|
47
|
+
|
|
48
|
+
if state == WorkerState.Disconnected:
|
|
49
|
+
self.remove_worker(worker_id)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclasses.dataclass
|
|
53
|
+
class WorkerProcessorTable:
|
|
54
|
+
worker_name: str
|
|
55
|
+
rss_max: int
|
|
56
|
+
rss_free: int
|
|
57
|
+
processor_statuses: List[ProcessorStatus]
|
|
58
|
+
|
|
59
|
+
handler: Optional[Element] = dataclasses.field(default=None)
|
|
60
|
+
|
|
61
|
+
def draw_table(self):
|
|
62
|
+
formatted_worker_name = format_worker_name(self.worker_name)
|
|
63
|
+
with ui.card().classes("w-full") as handler:
|
|
64
|
+
self.handler = handler
|
|
65
|
+
|
|
66
|
+
ui.markdown(f"Worker **{formatted_worker_name}**").classes("text-xl")
|
|
67
|
+
|
|
68
|
+
with ui.grid(columns=7).classes("w-full"):
|
|
69
|
+
self.draw_titles()
|
|
70
|
+
for processor in sorted(self.processor_statuses, key=lambda x: x.pid):
|
|
71
|
+
if processor.resource.rss > self.rss_max:
|
|
72
|
+
self.rss_max = processor.resource.rss
|
|
73
|
+
|
|
74
|
+
self.draw_row(processor, self.rss_free, self.rss_max)
|
|
75
|
+
|
|
76
|
+
@staticmethod
|
|
77
|
+
def draw_titles():
|
|
78
|
+
ui.label("Processor PID")
|
|
79
|
+
ui.label("CPU %")
|
|
80
|
+
ui.label("RSS (in MB)")
|
|
81
|
+
ui.label("Max RSS (in MB)")
|
|
82
|
+
ui.label("Initialized")
|
|
83
|
+
ui.label("Has Task")
|
|
84
|
+
ui.label("Suspended")
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def draw_row(processor_status: ProcessorStatus, rss_free: int, rss_max: int):
|
|
88
|
+
cpu = processor_status.resource.cpu / 10
|
|
89
|
+
rss = int(processor_status.resource.rss / 1e6)
|
|
90
|
+
rss_max = int(rss_max / 1e6)
|
|
91
|
+
rss_free = int(rss_free / 1e6)
|
|
92
|
+
|
|
93
|
+
ui.label(str(processor_status.pid))
|
|
94
|
+
ui.knob(value=cpu, track_color="grey-2", show_value=True, min=0, max=100)
|
|
95
|
+
ui.knob(value=rss, track_color="grey-2", show_value=True, min=0, max=rss + rss_free)
|
|
96
|
+
ui.knob(value=rss_max, track_color="grey-2", show_value=True, min=0, max=rss + rss_free)
|
|
97
|
+
ui.checkbox().bind_value_from(processor_status, "initialized")
|
|
98
|
+
ui.checkbox().bind_value_from(processor_status, "has_task")
|
|
99
|
+
ui.checkbox().bind_value_from(processor_status, "suspended")
|
|
100
|
+
|
|
101
|
+
def delete_row(self):
|
|
102
|
+
assert self.handler is not None
|
|
103
|
+
self.handler.clear()
|
|
104
|
+
self.handler.delete()
|
|
File without changes
|
scaler/utility/debug.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
import pdb
|
|
3
|
+
import sys
|
|
4
|
+
from typing import Callable
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def pdb_wrapped(func: Callable):
|
|
8
|
+
@functools.wraps(func)
|
|
9
|
+
def pdb_wrapper(*args, **kwargs):
|
|
10
|
+
try:
|
|
11
|
+
exit_code = func(*args, **kwargs)
|
|
12
|
+
sys.exit(exit_code)
|
|
13
|
+
|
|
14
|
+
except Exception:
|
|
15
|
+
ex_type, value, tb = sys.exc_info()
|
|
16
|
+
pdb.post_mortem(tb)
|
|
17
|
+
raise
|
|
18
|
+
|
|
19
|
+
return pdb_wrapper
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
from typing import Callable
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class EventList(collections.UserList):
|
|
6
|
+
"""A list that emits events when it is modified."""
|
|
7
|
+
|
|
8
|
+
def __init__(self, initlist=None):
|
|
9
|
+
super().__init__(initlist=initlist)
|
|
10
|
+
self._callbacks = []
|
|
11
|
+
|
|
12
|
+
def add_update_callback(self, callback: Callable[["EventList"], None]):
|
|
13
|
+
self._callbacks.append(callback)
|
|
14
|
+
|
|
15
|
+
def __setitem__(self, i, item):
|
|
16
|
+
super().__setitem__(i, item)
|
|
17
|
+
self._list_updated()
|
|
18
|
+
|
|
19
|
+
def __delitem__(self, i):
|
|
20
|
+
super().__delitem__(i)
|
|
21
|
+
self._list_updated()
|
|
22
|
+
|
|
23
|
+
def __add__(self, other):
|
|
24
|
+
super().__add__(other)
|
|
25
|
+
self._list_updated()
|
|
26
|
+
|
|
27
|
+
def __iadd__(self, other):
|
|
28
|
+
super().__iadd__(other)
|
|
29
|
+
self._list_updated()
|
|
30
|
+
return self
|
|
31
|
+
|
|
32
|
+
def append(self, item):
|
|
33
|
+
super().append(item)
|
|
34
|
+
self._list_updated()
|
|
35
|
+
|
|
36
|
+
def insert(self, i, item):
|
|
37
|
+
super().insert(i, item)
|
|
38
|
+
self._list_updated()
|
|
39
|
+
|
|
40
|
+
def pop(self, i: int = -1):
|
|
41
|
+
v = super().pop(i)
|
|
42
|
+
self._list_updated()
|
|
43
|
+
return v
|
|
44
|
+
|
|
45
|
+
def remove(self, item):
|
|
46
|
+
super().remove(item)
|
|
47
|
+
self._list_updated()
|
|
48
|
+
|
|
49
|
+
def clear(self) -> None:
|
|
50
|
+
super().clear()
|
|
51
|
+
self._list_updated()
|
|
52
|
+
|
|
53
|
+
def sort(self, /, *args, **kwargs):
|
|
54
|
+
super().sort(*args, **kwargs)
|
|
55
|
+
self._list_updated()
|
|
56
|
+
|
|
57
|
+
def extend(self, other) -> None:
|
|
58
|
+
super().extend(other)
|
|
59
|
+
self._list_updated()
|
|
60
|
+
|
|
61
|
+
def _list_updated(self):
|
|
62
|
+
for callback in self._callbacks:
|
|
63
|
+
callback(self)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import enum
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Awaitable, Callable
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class EventLoopType(enum.Enum):
|
|
8
|
+
builtin = enum.auto()
|
|
9
|
+
uvloop = enum.auto()
|
|
10
|
+
|
|
11
|
+
@staticmethod
|
|
12
|
+
def allowed_types():
|
|
13
|
+
return {m.name for m in EventLoopType}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def register_event_loop(event_loop_type: str):
|
|
17
|
+
if event_loop_type not in EventLoopType.allowed_types():
|
|
18
|
+
raise TypeError(f"allowed event loop types are: {EventLoopType.allowed_types()}")
|
|
19
|
+
|
|
20
|
+
event_loop_type_enum = EventLoopType[event_loop_type]
|
|
21
|
+
if event_loop_type_enum == EventLoopType.uvloop:
|
|
22
|
+
try:
|
|
23
|
+
import uvloop # noqa
|
|
24
|
+
except ImportError:
|
|
25
|
+
raise ImportError("please use pip install uvloop if try to use uvloop as event loop")
|
|
26
|
+
|
|
27
|
+
uvloop.install()
|
|
28
|
+
|
|
29
|
+
assert event_loop_type in EventLoopType.allowed_types()
|
|
30
|
+
|
|
31
|
+
logging.info(f"use event loop: {event_loop_type}")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def create_async_loop_routine(routine: Callable[[], Awaitable], seconds: int):
|
|
35
|
+
"""create async loop routine,
|
|
36
|
+
|
|
37
|
+
- if seconds is negative, means disable
|
|
38
|
+
- 0 means looping without any wait, as fast as possible
|
|
39
|
+
- positive number means execute routine every positive seconds, if passing 1 means run once every 1 seconds"""
|
|
40
|
+
|
|
41
|
+
async def loop():
|
|
42
|
+
if seconds < 0:
|
|
43
|
+
logging.info(f"{routine.__self__.__class__.__name__}: disabled") # type: ignore[attr-defined]
|
|
44
|
+
return
|
|
45
|
+
|
|
46
|
+
logging.info(f"{routine.__self__.__class__.__name__}: started") # type: ignore[attr-defined]
|
|
47
|
+
try:
|
|
48
|
+
while True:
|
|
49
|
+
await routine()
|
|
50
|
+
await asyncio.sleep(seconds)
|
|
51
|
+
except asyncio.CancelledError:
|
|
52
|
+
pass
|
|
53
|
+
except KeyboardInterrupt:
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
logging.info(f"{routine.__self__.__class__.__name__}: exited") # type: ignore[attr-defined]
|
|
57
|
+
|
|
58
|
+
return loop()
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
class TaskNotFoundError(Exception):
|
|
2
|
+
pass
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class WorkerDiedError(Exception):
|
|
6
|
+
pass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class NoWorkerError(Exception):
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DisconnectedError(Exception):
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ProcessorDiedError(Exception):
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DeserializeObjectError(Exception):
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class MissingObjects(Exception):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ClientCancelledException(Exception):
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ClientShutdownException(Exception):
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ClientQuitException(Exception):
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class ObjectStorageException(Exception):
|
|
42
|
+
pass
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
STORAGE_SIZE_MODULUS = 1024.0
|
|
2
|
+
TIME_MODULUS = 1000
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def format_bytes(number) -> str:
|
|
6
|
+
for unit in ["B", "K", "M", "G", "T"]:
|
|
7
|
+
if number >= STORAGE_SIZE_MODULUS:
|
|
8
|
+
number /= STORAGE_SIZE_MODULUS
|
|
9
|
+
continue
|
|
10
|
+
|
|
11
|
+
if unit in {"B", "K"}:
|
|
12
|
+
return f"{int(number)}{unit}"
|
|
13
|
+
|
|
14
|
+
return f"{number:.1f}{unit}"
|
|
15
|
+
|
|
16
|
+
raise ValueError("This should not happen")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def format_integer(number):
|
|
20
|
+
return f"{number:,}"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def format_percentage(number: int):
|
|
24
|
+
return f"{(number/1000):.1%}"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def format_microseconds(number: int):
|
|
28
|
+
for unit in ["us", "ms", "s"]:
|
|
29
|
+
if number >= TIME_MODULUS:
|
|
30
|
+
number = int(number / TIME_MODULUS)
|
|
31
|
+
continue
|
|
32
|
+
|
|
33
|
+
if unit == "us":
|
|
34
|
+
return f"{number/TIME_MODULUS:.1f}ms"
|
|
35
|
+
|
|
36
|
+
too_big_sign = "+" if unit == "s" and number > TIME_MODULUS else ""
|
|
37
|
+
return f"{int(number)}{too_big_sign}{unit}"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def format_seconds(number: int):
|
|
41
|
+
if number > 60:
|
|
42
|
+
return "60+s"
|
|
43
|
+
|
|
44
|
+
return f"{number}s"
|
|
File without changes
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from collections import deque
|
|
2
|
+
from typing import Any, Callable, Dict, List, Tuple, Union
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def cull_graph(
|
|
6
|
+
graph: Dict[str, Tuple[Union[Callable, Any], ...]], keys: List[str]
|
|
7
|
+
) -> Dict[str, Tuple[Union[Callable, Any], ...]]:
|
|
8
|
+
queue = deque(keys)
|
|
9
|
+
visited = set()
|
|
10
|
+
for target_key in keys:
|
|
11
|
+
visited.add(target_key)
|
|
12
|
+
|
|
13
|
+
while queue:
|
|
14
|
+
key = queue.popleft()
|
|
15
|
+
|
|
16
|
+
task = graph[key]
|
|
17
|
+
if not (isinstance(task, tuple) and task and callable(task[0])):
|
|
18
|
+
continue
|
|
19
|
+
|
|
20
|
+
dependencies = set(task[1:])
|
|
21
|
+
for predecessor_key in dependencies:
|
|
22
|
+
if predecessor_key in visited:
|
|
23
|
+
continue
|
|
24
|
+
visited.add(predecessor_key)
|
|
25
|
+
queue.append(predecessor_key)
|
|
26
|
+
|
|
27
|
+
return {key: graph[key] for key in visited}
|