opengris-scaler 1.12.37__cp38-cp38-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. opengris_scaler-1.12.37.dist-info/METADATA +730 -0
  2. opengris_scaler-1.12.37.dist-info/RECORD +196 -0
  3. opengris_scaler-1.12.37.dist-info/WHEEL +5 -0
  4. opengris_scaler-1.12.37.dist-info/entry_points.txt +10 -0
  5. opengris_scaler-1.12.37.dist-info/licenses/LICENSE +201 -0
  6. opengris_scaler-1.12.37.dist-info/licenses/LICENSE.spdx +7 -0
  7. opengris_scaler-1.12.37.dist-info/licenses/NOTICE +8 -0
  8. opengris_scaler.libs/libcapnp-1-e88d5415.0.1.so +0 -0
  9. opengris_scaler.libs/libgcc_s-2298274a.so.1 +0 -0
  10. opengris_scaler.libs/libkj-1-9bebd8ac.0.1.so +0 -0
  11. opengris_scaler.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
  12. scaler/__init__.py +14 -0
  13. scaler/about.py +5 -0
  14. scaler/client/__init__.py +0 -0
  15. scaler/client/agent/__init__.py +0 -0
  16. scaler/client/agent/client_agent.py +218 -0
  17. scaler/client/agent/disconnect_manager.py +27 -0
  18. scaler/client/agent/future_manager.py +112 -0
  19. scaler/client/agent/heartbeat_manager.py +74 -0
  20. scaler/client/agent/mixins.py +89 -0
  21. scaler/client/agent/object_manager.py +98 -0
  22. scaler/client/agent/task_manager.py +64 -0
  23. scaler/client/client.py +672 -0
  24. scaler/client/future.py +252 -0
  25. scaler/client/object_buffer.py +129 -0
  26. scaler/client/object_reference.py +25 -0
  27. scaler/client/serializer/__init__.py +0 -0
  28. scaler/client/serializer/default.py +16 -0
  29. scaler/client/serializer/mixins.py +38 -0
  30. scaler/cluster/__init__.py +0 -0
  31. scaler/cluster/cluster.py +95 -0
  32. scaler/cluster/combo.py +157 -0
  33. scaler/cluster/object_storage_server.py +45 -0
  34. scaler/cluster/scheduler.py +86 -0
  35. scaler/config/__init__.py +0 -0
  36. scaler/config/common/__init__.py +0 -0
  37. scaler/config/common/logging.py +41 -0
  38. scaler/config/common/web.py +18 -0
  39. scaler/config/common/worker.py +65 -0
  40. scaler/config/common/worker_adapter.py +28 -0
  41. scaler/config/config_class.py +317 -0
  42. scaler/config/defaults.py +94 -0
  43. scaler/config/mixins.py +20 -0
  44. scaler/config/section/__init__.py +0 -0
  45. scaler/config/section/cluster.py +66 -0
  46. scaler/config/section/ecs_worker_adapter.py +78 -0
  47. scaler/config/section/native_worker_adapter.py +30 -0
  48. scaler/config/section/object_storage_server.py +13 -0
  49. scaler/config/section/scheduler.py +126 -0
  50. scaler/config/section/symphony_worker_adapter.py +35 -0
  51. scaler/config/section/top.py +16 -0
  52. scaler/config/section/webui.py +16 -0
  53. scaler/config/types/__init__.py +0 -0
  54. scaler/config/types/network_backend.py +12 -0
  55. scaler/config/types/object_storage_server.py +45 -0
  56. scaler/config/types/worker.py +67 -0
  57. scaler/config/types/zmq.py +83 -0
  58. scaler/entry_points/__init__.py +0 -0
  59. scaler/entry_points/cluster.py +10 -0
  60. scaler/entry_points/object_storage_server.py +26 -0
  61. scaler/entry_points/scheduler.py +51 -0
  62. scaler/entry_points/top.py +272 -0
  63. scaler/entry_points/webui.py +6 -0
  64. scaler/entry_points/worker_adapter_ecs.py +22 -0
  65. scaler/entry_points/worker_adapter_native.py +31 -0
  66. scaler/entry_points/worker_adapter_symphony.py +26 -0
  67. scaler/io/__init__.py +0 -0
  68. scaler/io/async_binder.py +89 -0
  69. scaler/io/async_connector.py +95 -0
  70. scaler/io/async_object_storage_connector.py +225 -0
  71. scaler/io/mixins.py +154 -0
  72. scaler/io/sync_connector.py +68 -0
  73. scaler/io/sync_object_storage_connector.py +249 -0
  74. scaler/io/sync_subscriber.py +83 -0
  75. scaler/io/utility.py +80 -0
  76. scaler/io/ymq/__init__.py +0 -0
  77. scaler/io/ymq/_ymq.pyi +95 -0
  78. scaler/io/ymq/_ymq.so +0 -0
  79. scaler/io/ymq/ymq.py +138 -0
  80. scaler/io/ymq_async_object_storage_connector.py +184 -0
  81. scaler/io/ymq_sync_object_storage_connector.py +184 -0
  82. scaler/object_storage/__init__.py +0 -0
  83. scaler/object_storage/object_storage_server.so +0 -0
  84. scaler/protocol/__init__.py +0 -0
  85. scaler/protocol/capnp/__init__.py +0 -0
  86. scaler/protocol/capnp/_python.py +6 -0
  87. scaler/protocol/capnp/common.capnp +68 -0
  88. scaler/protocol/capnp/message.capnp +218 -0
  89. scaler/protocol/capnp/object_storage.capnp +57 -0
  90. scaler/protocol/capnp/status.capnp +73 -0
  91. scaler/protocol/introduction.md +105 -0
  92. scaler/protocol/python/__init__.py +0 -0
  93. scaler/protocol/python/common.py +140 -0
  94. scaler/protocol/python/message.py +751 -0
  95. scaler/protocol/python/mixins.py +13 -0
  96. scaler/protocol/python/object_storage.py +118 -0
  97. scaler/protocol/python/status.py +279 -0
  98. scaler/protocol/worker.md +228 -0
  99. scaler/scheduler/__init__.py +0 -0
  100. scaler/scheduler/allocate_policy/__init__.py +0 -0
  101. scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
  102. scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
  103. scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
  104. scaler/scheduler/allocate_policy/mixins.py +55 -0
  105. scaler/scheduler/controllers/__init__.py +0 -0
  106. scaler/scheduler/controllers/balance_controller.py +65 -0
  107. scaler/scheduler/controllers/client_controller.py +131 -0
  108. scaler/scheduler/controllers/config_controller.py +31 -0
  109. scaler/scheduler/controllers/graph_controller.py +424 -0
  110. scaler/scheduler/controllers/information_controller.py +81 -0
  111. scaler/scheduler/controllers/mixins.py +194 -0
  112. scaler/scheduler/controllers/object_controller.py +147 -0
  113. scaler/scheduler/controllers/scaling_policies/__init__.py +0 -0
  114. scaler/scheduler/controllers/scaling_policies/fixed_elastic.py +145 -0
  115. scaler/scheduler/controllers/scaling_policies/mixins.py +10 -0
  116. scaler/scheduler/controllers/scaling_policies/null.py +14 -0
  117. scaler/scheduler/controllers/scaling_policies/types.py +9 -0
  118. scaler/scheduler/controllers/scaling_policies/utility.py +20 -0
  119. scaler/scheduler/controllers/scaling_policies/vanilla.py +95 -0
  120. scaler/scheduler/controllers/task_controller.py +376 -0
  121. scaler/scheduler/controllers/worker_controller.py +169 -0
  122. scaler/scheduler/object_usage/__init__.py +0 -0
  123. scaler/scheduler/object_usage/object_tracker.py +131 -0
  124. scaler/scheduler/scheduler.py +251 -0
  125. scaler/scheduler/task/__init__.py +0 -0
  126. scaler/scheduler/task/task_state_machine.py +92 -0
  127. scaler/scheduler/task/task_state_manager.py +61 -0
  128. scaler/ui/__init__.py +0 -0
  129. scaler/ui/common/__init__.py +0 -0
  130. scaler/ui/common/constants.py +9 -0
  131. scaler/ui/common/live_display.py +147 -0
  132. scaler/ui/common/memory_window.py +146 -0
  133. scaler/ui/common/setting_page.py +40 -0
  134. scaler/ui/common/task_graph.py +840 -0
  135. scaler/ui/common/task_log.py +111 -0
  136. scaler/ui/common/utility.py +66 -0
  137. scaler/ui/common/webui.py +80 -0
  138. scaler/ui/common/worker_processors.py +104 -0
  139. scaler/ui/v1.py +76 -0
  140. scaler/ui/v2.py +102 -0
  141. scaler/ui/webui.py +21 -0
  142. scaler/utility/__init__.py +0 -0
  143. scaler/utility/debug.py +19 -0
  144. scaler/utility/event_list.py +63 -0
  145. scaler/utility/event_loop.py +58 -0
  146. scaler/utility/exceptions.py +42 -0
  147. scaler/utility/formatter.py +44 -0
  148. scaler/utility/graph/__init__.py +0 -0
  149. scaler/utility/graph/optimization.py +27 -0
  150. scaler/utility/graph/topological_sorter.py +11 -0
  151. scaler/utility/graph/topological_sorter_graphblas.py +174 -0
  152. scaler/utility/identifiers.py +107 -0
  153. scaler/utility/logging/__init__.py +0 -0
  154. scaler/utility/logging/decorators.py +25 -0
  155. scaler/utility/logging/scoped_logger.py +33 -0
  156. scaler/utility/logging/utility.py +183 -0
  157. scaler/utility/many_to_many_dict.py +123 -0
  158. scaler/utility/metadata/__init__.py +0 -0
  159. scaler/utility/metadata/profile_result.py +31 -0
  160. scaler/utility/metadata/task_flags.py +30 -0
  161. scaler/utility/mixins.py +13 -0
  162. scaler/utility/network_util.py +7 -0
  163. scaler/utility/one_to_many_dict.py +72 -0
  164. scaler/utility/queues/__init__.py +0 -0
  165. scaler/utility/queues/async_indexed_queue.py +37 -0
  166. scaler/utility/queues/async_priority_queue.py +70 -0
  167. scaler/utility/queues/async_sorted_priority_queue.py +45 -0
  168. scaler/utility/queues/indexed_queue.py +114 -0
  169. scaler/utility/serialization.py +9 -0
  170. scaler/version.txt +1 -0
  171. scaler/worker/__init__.py +0 -0
  172. scaler/worker/agent/__init__.py +0 -0
  173. scaler/worker/agent/heartbeat_manager.py +110 -0
  174. scaler/worker/agent/mixins.py +137 -0
  175. scaler/worker/agent/processor/__init__.py +0 -0
  176. scaler/worker/agent/processor/object_cache.py +107 -0
  177. scaler/worker/agent/processor/processor.py +285 -0
  178. scaler/worker/agent/processor/streaming_buffer.py +28 -0
  179. scaler/worker/agent/processor_holder.py +147 -0
  180. scaler/worker/agent/processor_manager.py +369 -0
  181. scaler/worker/agent/profiling_manager.py +109 -0
  182. scaler/worker/agent/task_manager.py +150 -0
  183. scaler/worker/agent/timeout_manager.py +19 -0
  184. scaler/worker/preload.py +84 -0
  185. scaler/worker/worker.py +265 -0
  186. scaler/worker_adapter/__init__.py +0 -0
  187. scaler/worker_adapter/common.py +26 -0
  188. scaler/worker_adapter/ecs.py +241 -0
  189. scaler/worker_adapter/native.py +138 -0
  190. scaler/worker_adapter/symphony/__init__.py +0 -0
  191. scaler/worker_adapter/symphony/callback.py +45 -0
  192. scaler/worker_adapter/symphony/heartbeat_manager.py +82 -0
  193. scaler/worker_adapter/symphony/message.py +24 -0
  194. scaler/worker_adapter/symphony/task_manager.py +289 -0
  195. scaler/worker_adapter/symphony/worker.py +204 -0
  196. scaler/worker_adapter/symphony/worker_adapter.py +123 -0
@@ -0,0 +1,111 @@
1
+ import dataclasses
2
+ from collections import deque
3
+ from threading import Lock
4
+ from typing import Deque, Dict, Optional
5
+
6
+ from nicegui import ui
7
+
8
+ from scaler.protocol.python.common import TaskState
9
+ from scaler.protocol.python.message import StateTask, StateWorker
10
+ from scaler.ui.common.utility import COMPLETED_TASK_STATUSES, display_capabilities
11
+ from scaler.ui.util import NICEGUI_MAJOR_VERSION
12
+ from scaler.utility.formatter import format_bytes
13
+ from scaler.utility.metadata.profile_result import ProfileResult
14
+
15
+ TASK_ID_HTML_TEMPLATE = (
16
+ "<span "
17
+ "style='display:inline-block;max-width:12rem;overflow:hidden;text-overflow:ellipsis;"
18
+ "white-space:nowrap;cursor:pointer;font:inherit;color:inherit' "
19
+ "title='{task}' onclick=\"navigator.clipboard.writeText('{task}')\">{task}</span>"
20
+ )
21
+
22
+
23
+ @dataclasses.dataclass
24
+ class TaskData:
25
+ task: str = dataclasses.field(default="")
26
+ function: str = dataclasses.field(default="")
27
+ duration: str = dataclasses.field(default="")
28
+ peak_mem: str = dataclasses.field(default="")
29
+ status: str = dataclasses.field(default="")
30
+ capabilities: str = dataclasses.field(default="")
31
+
32
+ def populate(
33
+ self,
34
+ state: StateTask,
35
+ function_name: str,
36
+ profiling_data: Optional[ProfileResult],
37
+ task_capabilities: Dict[str, int],
38
+ ):
39
+ self.task = f"{state.task_id.hex()}"
40
+ self.function = function_name
41
+ self.status = state.state.name
42
+
43
+ if profiling_data:
44
+ duration = profiling_data.duration_s
45
+ mem = profiling_data.memory_peak
46
+ self.duration = f"{duration:.2f}s"
47
+ self.peak_mem = format_bytes(mem) if mem != 0 else "0"
48
+ else:
49
+ self.duration = "N/A"
50
+ self.peak_mem = "N/A"
51
+
52
+ self.capabilities = display_capabilities(set(task_capabilities.keys()))
53
+
54
+ def draw_row(self):
55
+ color = "color: green" if self.status == TaskState.Success.name else "color: red"
56
+ if NICEGUI_MAJOR_VERSION < 3:
57
+ ui.html(TASK_ID_HTML_TEMPLATE.format(task=self.task))
58
+ else:
59
+ ui.html(TASK_ID_HTML_TEMPLATE.format(task=self.task), sanitize=False) # type: ignore[call-arg]
60
+ ui.label(self.function)
61
+ ui.label(self.duration)
62
+ ui.label(self.peak_mem)
63
+ ui.label(self.status).style(color)
64
+ ui.label(self.capabilities)
65
+
66
+ @staticmethod
67
+ def draw_titles():
68
+ ui.label("Task ID")
69
+ ui.label("Function")
70
+ ui.label("Duration")
71
+ ui.label("Peak mem")
72
+ ui.label("Status")
73
+ ui.label("Capabilities")
74
+
75
+
76
+ class TaskLogTable:
77
+ def __init__(self):
78
+ self._task_log: Deque[TaskData] = deque(maxlen=100)
79
+ self._task_id_to_function_name: Dict[str, str] = {}
80
+ self._lock: Lock = Lock()
81
+
82
+ def handle_task_state(self, state_task: StateTask):
83
+ if state_task.function_name != b"" and state_task.task_id.hex() not in self._task_id_to_function_name:
84
+ self._task_id_to_function_name[state_task.task_id.hex()] = state_task.function_name.decode()
85
+
86
+ if state_task.state not in COMPLETED_TASK_STATUSES:
87
+ return
88
+
89
+ function_name = state_task.function_name.decode()
90
+ if function_name == "":
91
+ function_name = self._task_id_to_function_name.pop(state_task.task_id.hex(), "")
92
+
93
+ # Canceled/failed states don't have profiling metadata
94
+ profiling_data = ProfileResult.deserialize(state_task.metadata) if state_task.metadata != b"" else None
95
+
96
+ row = TaskData()
97
+ row.populate(state_task, function_name, profiling_data, state_task.capabilities)
98
+
99
+ with self._lock:
100
+ self._task_log.appendleft(row)
101
+
102
+ def handle_worker_state(self, _: StateWorker):
103
+ return
104
+
105
+ @ui.refreshable
106
+ def draw_section(self):
107
+ with self._lock:
108
+ with ui.card().classes("w-full q-mx-auto"), ui.grid(columns=6).classes("q-mx-auto"):
109
+ TaskData.draw_titles()
110
+ for task in self._task_log:
111
+ task.draw_row()
@@ -0,0 +1,66 @@
1
+ import datetime
2
+ from typing import List, Set, Tuple
3
+
4
+ from scaler.protocol.python.common import TaskState
5
+ from scaler.ui.common.setting_page import Settings
6
+
7
+ COMPLETED_TASK_STATUSES = {
8
+ TaskState.Success,
9
+ TaskState.Canceled,
10
+ TaskState.CanceledNotFound,
11
+ TaskState.Failed,
12
+ TaskState.FailedWorkerDied,
13
+ }
14
+
15
+
16
+ def format_timediff(a: datetime.datetime, b: datetime.datetime) -> float:
17
+ return (b - a).total_seconds()
18
+
19
+
20
+ def format_worker_name(worker_name: str, cutoff: int = 15) -> str:
21
+ return worker_name[:cutoff] + "+" if len(worker_name) > cutoff else worker_name
22
+
23
+
24
+ def get_bounds(now: datetime.datetime, start_time: datetime.datetime, settings: Settings) -> Tuple[int, int]:
25
+ upper_range = now - start_time
26
+ lower_range = upper_range - settings.stream_window
27
+
28
+ bound_upper_seconds = max(upper_range.seconds, settings.stream_window.seconds)
29
+ bound_lower_seconds = 0 if bound_upper_seconds == settings.stream_window.seconds else lower_range.seconds
30
+
31
+ return bound_lower_seconds, bound_upper_seconds
32
+
33
+
34
+ def make_taskstream_ticks(lower_bound: int, upper_bound: int) -> List[int]:
35
+ distance = (upper_bound - lower_bound) // 6
36
+ return list(range(lower_bound, upper_bound + 1, distance))
37
+
38
+
39
+ def make_memory_ticks(max_bytes: int) -> Tuple[List[int], List[str]]:
40
+ units = ["B", "KB", "MB", "GB", "TB"]
41
+ vals: List[int] = [0]
42
+ texts: List[str] = ["0"]
43
+ v = 1
44
+ i = 0
45
+ # ensure at least up to 1GB on empty data
46
+ target = max(1024 * 1024 * 1024, max_bytes)
47
+ while i < len(units) and v <= target:
48
+ vals.append(v)
49
+ texts.append(f"1{units[i]}")
50
+ v *= 1024
51
+ i += 1
52
+ return vals, texts
53
+
54
+
55
+ def make_tick_text(window_length: int) -> List[int]:
56
+ upper = 0
57
+ lower = -1 * window_length
58
+ distance = (upper - lower) // 6
59
+ return list(range(lower, upper + 1, distance))
60
+
61
+
62
+ def display_capabilities(capabilities: Set[str]) -> str:
63
+ if not capabilities or len(capabilities) == 0:
64
+ return "<no capabilities>"
65
+
66
+ return " & ".join(sorted(capabilities))
@@ -0,0 +1,80 @@
1
+ import dataclasses
2
+ import logging
3
+
4
+ from scaler.protocol.python.message import StateBalanceAdvice, StateScheduler, StateTask, StateWorker
5
+ from scaler.protocol.python.mixins import Message
6
+ from scaler.ui.common.live_display import SchedulerSection, WorkersSection
7
+ from scaler.ui.common.memory_window import MemoryChart
8
+ from scaler.ui.common.setting_page import Settings
9
+ from scaler.ui.common.task_graph import TaskStream
10
+ from scaler.ui.common.task_log import TaskLogTable
11
+ from scaler.ui.common.worker_processors import WorkerProcessors
12
+ from scaler.utility.formatter import format_bytes, format_percentage
13
+
14
+
15
+ @dataclasses.dataclass
16
+ class Sections:
17
+ scheduler_section: SchedulerSection
18
+ workers_section: WorkersSection
19
+ task_stream_section: TaskStream
20
+ memory_usage_section: MemoryChart
21
+ tasklog_section: TaskLogTable
22
+ worker_processors: WorkerProcessors
23
+ settings_section: Settings
24
+
25
+
26
+ def process_scheduler_message(status: Message, tables: Sections):
27
+ if isinstance(status, StateScheduler):
28
+ __update_scheduler_state(status, tables)
29
+ return
30
+
31
+ if isinstance(status, StateWorker):
32
+ logging.info(f"Received StateWorker update for worker {status.worker_id.decode()} with {status.state.name}")
33
+ tables.scheduler_section.handle_worker_state(status)
34
+ tables.workers_section.handle_worker_state(status)
35
+ tables.task_stream_section.handle_worker_state(status)
36
+ tables.memory_usage_section.handle_worker_state(status)
37
+ tables.tasklog_section.handle_worker_state(status)
38
+ tables.worker_processors.handle_worker_state(status)
39
+ tables.settings_section.handle_worker_state(status)
40
+ return
41
+
42
+ if isinstance(status, StateTask):
43
+ logging.debug(f"Received StateTask update for task {status.task_id.hex()} with {status.state.name}")
44
+ tables.scheduler_section.handle_task_state(status)
45
+ tables.workers_section.handle_task_state(status)
46
+ tables.task_stream_section.handle_task_state(status)
47
+ tables.memory_usage_section.handle_task_state(status)
48
+ tables.tasklog_section.handle_task_state(status)
49
+ tables.worker_processors.handle_task_state(status)
50
+ tables.settings_section.handle_task_state(status)
51
+ return
52
+
53
+ if isinstance(status, StateBalanceAdvice):
54
+ logging.debug(f"Received StateBalanceAdvice for {status.worker_id.decode()} with {len(status.task_ids)} tasks")
55
+ return
56
+
57
+ logging.info(f"Unhandled message received: {type(status)}")
58
+
59
+
60
+ def __update_scheduler_state(data: StateScheduler, tables: Sections):
61
+ tables.scheduler_section.cpu = format_percentage(data.scheduler.cpu)
62
+ tables.scheduler_section.rss = format_bytes(data.scheduler.rss)
63
+ tables.scheduler_section.rss_free = format_bytes(data.rss_free)
64
+
65
+ previous_workers = set(tables.workers_section.workers.keys())
66
+ current_workers = set(worker_data.worker_id.decode() for worker_data in data.worker_manager.workers)
67
+
68
+ for worker_data in data.worker_manager.workers:
69
+ worker_name = worker_data.worker_id.decode()
70
+ tables.workers_section.workers[worker_name].populate(worker_data)
71
+
72
+ for died_worker in previous_workers - current_workers:
73
+ tables.workers_section.workers.pop(died_worker)
74
+ tables.worker_processors.remove_worker(died_worker)
75
+ tables.task_stream_section.mark_dead_worker(died_worker)
76
+
77
+ if previous_workers != current_workers:
78
+ tables.workers_section.draw_section.refresh()
79
+
80
+ tables.worker_processors.update_data(data.worker_manager.workers)
@@ -0,0 +1,104 @@
1
+ import dataclasses
2
+ from threading import Lock
3
+ from typing import Dict, List, Optional
4
+
5
+ from nicegui import ui
6
+ from nicegui.element import Element
7
+
8
+ from scaler.protocol.python.common import WorkerState
9
+ from scaler.protocol.python.message import StateTask, StateWorker
10
+ from scaler.protocol.python.status import ProcessorStatus, WorkerStatus
11
+ from scaler.ui.common.utility import format_worker_name
12
+
13
+
14
+ @dataclasses.dataclass
15
+ class WorkerProcessors:
16
+ workers: Dict[str, "WorkerProcessorTable"] = dataclasses.field(default_factory=dict)
17
+ _lock: Lock = Lock()
18
+
19
+ @ui.refreshable
20
+ def draw_section(self):
21
+ with self._lock:
22
+ for processor_table in self.workers.values():
23
+ processor_table.draw_table()
24
+
25
+ def update_data(self, data: List[WorkerStatus]):
26
+ with self._lock:
27
+ for worker in data:
28
+ worker_name = worker.worker_id.decode()
29
+ processor_table = self.workers.get(worker_name)
30
+
31
+ if processor_table is None:
32
+ processor_table = WorkerProcessorTable(worker_name, 0, worker.rss_free, worker.processor_statuses)
33
+ self.workers[worker_name] = processor_table
34
+ elif processor_table.processor_statuses != worker.processor_statuses:
35
+ processor_table.processor_statuses = worker.processor_statuses
36
+
37
+ def remove_worker(self, dead_worker: str):
38
+ with self._lock:
39
+ self.workers.pop(dead_worker, None)
40
+
41
+ def handle_task_state(self, _: StateTask):
42
+ return
43
+
44
+ def handle_worker_state(self, state_worker: StateWorker):
45
+ worker_id = state_worker.worker_id.decode()
46
+ state = state_worker.state
47
+
48
+ if state == WorkerState.Disconnected:
49
+ self.remove_worker(worker_id)
50
+
51
+
52
+ @dataclasses.dataclass
53
+ class WorkerProcessorTable:
54
+ worker_name: str
55
+ rss_max: int
56
+ rss_free: int
57
+ processor_statuses: List[ProcessorStatus]
58
+
59
+ handler: Optional[Element] = dataclasses.field(default=None)
60
+
61
+ def draw_table(self):
62
+ formatted_worker_name = format_worker_name(self.worker_name)
63
+ with ui.card().classes("w-full") as handler:
64
+ self.handler = handler
65
+
66
+ ui.markdown(f"Worker **{formatted_worker_name}**").classes("text-xl")
67
+
68
+ with ui.grid(columns=7).classes("w-full"):
69
+ self.draw_titles()
70
+ for processor in sorted(self.processor_statuses, key=lambda x: x.pid):
71
+ if processor.resource.rss > self.rss_max:
72
+ self.rss_max = processor.resource.rss
73
+
74
+ self.draw_row(processor, self.rss_free, self.rss_max)
75
+
76
+ @staticmethod
77
+ def draw_titles():
78
+ ui.label("Processor PID")
79
+ ui.label("CPU %")
80
+ ui.label("RSS (in MB)")
81
+ ui.label("Max RSS (in MB)")
82
+ ui.label("Initialized")
83
+ ui.label("Has Task")
84
+ ui.label("Suspended")
85
+
86
+ @staticmethod
87
+ def draw_row(processor_status: ProcessorStatus, rss_free: int, rss_max: int):
88
+ cpu = processor_status.resource.cpu / 10
89
+ rss = int(processor_status.resource.rss / 1e6)
90
+ rss_max = int(rss_max / 1e6)
91
+ rss_free = int(rss_free / 1e6)
92
+
93
+ ui.label(str(processor_status.pid))
94
+ ui.knob(value=cpu, track_color="grey-2", show_value=True, min=0, max=100)
95
+ ui.knob(value=rss, track_color="grey-2", show_value=True, min=0, max=rss + rss_free)
96
+ ui.knob(value=rss_max, track_color="grey-2", show_value=True, min=0, max=rss + rss_free)
97
+ ui.checkbox().bind_value_from(processor_status, "initialized")
98
+ ui.checkbox().bind_value_from(processor_status, "has_task")
99
+ ui.checkbox().bind_value_from(processor_status, "suspended")
100
+
101
+ def delete_row(self):
102
+ assert self.handler is not None
103
+ self.handler.clear()
104
+ self.handler.delete()
scaler/ui/v1.py ADDED
@@ -0,0 +1,76 @@
1
+ import threading
2
+ from functools import partial
3
+
4
+ from nicegui import ui
5
+
6
+ from scaler.config.section.webui import WebUIConfig
7
+ from scaler.io.sync_subscriber import ZMQSyncSubscriber
8
+ from scaler.ui.common.constants import (
9
+ MEMORY_USAGE_UPDATE_INTERVAL,
10
+ TASK_LOG_REFRESH_INTERVAL,
11
+ TASK_STREAM_UPDATE_INTERVAL,
12
+ WORKER_PROCESSORS_REFRESH_INTERVAL,
13
+ )
14
+ from scaler.ui.common.live_display import SchedulerSection, WorkersSection
15
+ from scaler.ui.common.memory_window import MemoryChart
16
+ from scaler.ui.common.setting_page import Settings
17
+ from scaler.ui.common.task_graph import TaskStream
18
+ from scaler.ui.common.task_log import TaskLogTable
19
+ from scaler.ui.common.webui import Sections, process_scheduler_message
20
+ from scaler.ui.common.worker_processors import WorkerProcessors
21
+
22
+
23
+ def start_webui_v1(config: WebUIConfig):
24
+ tables = Sections(
25
+ scheduler_section=SchedulerSection(),
26
+ workers_section=WorkersSection(),
27
+ task_stream_section=TaskStream(),
28
+ memory_usage_section=MemoryChart(),
29
+ tasklog_section=TaskLogTable(),
30
+ worker_processors=WorkerProcessors(),
31
+ settings_section=Settings(),
32
+ )
33
+
34
+ with ui.tabs().classes("w-full h-full") as tabs:
35
+ live_tab = ui.tab("Live")
36
+ tasklog_tab = ui.tab("Task Log")
37
+ stream_tab = ui.tab("Worker Task Stream")
38
+ worker_processors_tab = ui.tab("Worker Processors")
39
+ settings_tab = ui.tab("Settings")
40
+
41
+ with ui.tab_panels(tabs, value=live_tab).classes("w-full"):
42
+ with ui.tab_panel(live_tab):
43
+ tables.scheduler_section.draw_section()
44
+ tables.workers_section.draw_section() # type: ignore[call-arg]
45
+
46
+ with ui.tab_panel(tasklog_tab):
47
+ tables.tasklog_section.draw_section() # type: ignore[call-arg]
48
+ ui.timer(TASK_LOG_REFRESH_INTERVAL, tables.tasklog_section.draw_section.refresh, active=True)
49
+
50
+ with ui.tab_panel(stream_tab):
51
+ tables.task_stream_section.setup_task_stream(tables.settings_section)
52
+ ui.timer(TASK_STREAM_UPDATE_INTERVAL, tables.task_stream_section.update_plot, active=True)
53
+
54
+ tables.memory_usage_section.setup_memory_chart(tables.settings_section)
55
+ ui.timer(MEMORY_USAGE_UPDATE_INTERVAL, tables.memory_usage_section.update_plot, active=True)
56
+
57
+ with ui.tab_panel(worker_processors_tab):
58
+ tables.worker_processors.draw_section() # type: ignore[call-arg]
59
+ ui.timer(WORKER_PROCESSORS_REFRESH_INTERVAL, tables.worker_processors.draw_section.refresh, active=True)
60
+
61
+ with ui.tab_panel(settings_tab):
62
+ tables.settings_section.draw_section()
63
+
64
+ subscriber = ZMQSyncSubscriber(
65
+ address=config.monitor_address,
66
+ callback=partial(process_scheduler_message, tables=tables),
67
+ topic=b"",
68
+ timeout_seconds=-1,
69
+ )
70
+ subscriber.start()
71
+
72
+ ui_thread = threading.Thread(
73
+ target=partial(ui.run, host=config.web_host, port=config.web_port, reload=False), daemon=False
74
+ )
75
+ ui_thread.start()
76
+ ui_thread.join()
scaler/ui/v2.py ADDED
@@ -0,0 +1,102 @@
1
+ import threading
2
+ from typing import Optional
3
+
4
+ from nicegui import Event, app, ui # type: ignore[attr-defined]
5
+
6
+ from scaler.config.section.webui import WebUIConfig
7
+ from scaler.io.sync_subscriber import ZMQSyncSubscriber
8
+ from scaler.protocol.python.mixins import Message
9
+ from scaler.ui.common.constants import (
10
+ MEMORY_USAGE_UPDATE_INTERVAL,
11
+ TASK_LOG_REFRESH_INTERVAL,
12
+ TASK_STREAM_UPDATE_INTERVAL,
13
+ WORKER_PROCESSORS_REFRESH_INTERVAL,
14
+ )
15
+ from scaler.ui.common.live_display import SchedulerSection, WorkersSection
16
+ from scaler.ui.common.memory_window import MemoryChart
17
+ from scaler.ui.common.setting_page import Settings
18
+ from scaler.ui.common.task_graph import TaskStream
19
+ from scaler.ui.common.task_log import TaskLogTable
20
+ from scaler.ui.common.webui import Sections, process_scheduler_message
21
+ from scaler.ui.common.worker_processors import WorkerProcessors
22
+
23
+
24
+ class WebUI:
25
+ def __init__(self) -> None:
26
+ self.scheduler_message = Event[Message]()
27
+ self.tables: Optional[Sections] = None
28
+
29
+ def start(self, host: str, port: int) -> None:
30
+ """Start the NiceGUI server in a separate thread."""
31
+ started = threading.Event()
32
+ app.on_startup(started.set)
33
+ thread = threading.Thread(
34
+ target=lambda: ui.run(self.root, host=host, port=port, reload=False), # type: ignore[misc,arg-type]
35
+ daemon=True,
36
+ )
37
+ thread.start()
38
+ if not started.wait(timeout=3.0):
39
+ raise RuntimeError("NiceGUI did not start within 3 seconds.")
40
+
41
+ def root(self) -> None:
42
+ """Create the UI for each new visitor."""
43
+ self.scheduler_message.subscribe(self.handle_message)
44
+ tables = Sections(
45
+ scheduler_section=SchedulerSection(),
46
+ workers_section=WorkersSection(),
47
+ task_stream_section=TaskStream(),
48
+ memory_usage_section=MemoryChart(),
49
+ tasklog_section=TaskLogTable(),
50
+ worker_processors=WorkerProcessors(),
51
+ settings_section=Settings(),
52
+ )
53
+ self.tables = tables
54
+
55
+ with ui.tabs().classes("w-full h-full") as tabs:
56
+ live_tab = ui.tab("Live")
57
+ tasklog_tab = ui.tab("Task Log")
58
+ stream_tab = ui.tab("Worker Task Stream")
59
+ worker_processors_tab = ui.tab("Worker Processors")
60
+ settings_tab = ui.tab("Settings")
61
+
62
+ with ui.tab_panels(tabs, value=live_tab).classes("w-full"):
63
+ with ui.tab_panel(live_tab):
64
+ tables.scheduler_section.draw_section()
65
+ tables.workers_section.draw_section() # type: ignore[call-arg]
66
+
67
+ with ui.tab_panel(tasklog_tab):
68
+ tables.tasklog_section.draw_section() # type: ignore[call-arg]
69
+ ui.timer(TASK_LOG_REFRESH_INTERVAL, tables.tasklog_section.draw_section.refresh, active=True)
70
+
71
+ with ui.tab_panel(stream_tab):
72
+ tables.task_stream_section.setup_task_stream(tables.settings_section)
73
+ ui.timer(TASK_STREAM_UPDATE_INTERVAL, tables.task_stream_section.update_plot, active=True)
74
+
75
+ tables.memory_usage_section.setup_memory_chart(tables.settings_section)
76
+ ui.timer(MEMORY_USAGE_UPDATE_INTERVAL, tables.memory_usage_section.update_plot, active=True)
77
+
78
+ with ui.tab_panel(worker_processors_tab):
79
+ tables.worker_processors.draw_section() # type: ignore[call-arg]
80
+ ui.timer(WORKER_PROCESSORS_REFRESH_INTERVAL, tables.worker_processors.draw_section.refresh, active=True)
81
+
82
+ with ui.tab_panel(settings_tab):
83
+ tables.settings_section.draw_section()
84
+
85
+ def new_message(self, status: Message):
86
+ self.scheduler_message.emit(status)
87
+
88
+ def handle_message(self, status: Message):
89
+ process_scheduler_message(status, self.tables)
90
+
91
+
92
+ def start_webui_v2(config: WebUIConfig):
93
+ webui = WebUI()
94
+ webui.start(config.web_host, config.web_port)
95
+
96
+ subscriber = ZMQSyncSubscriber(
97
+ address=config.monitor_address, callback=webui.new_message, topic=b"", timeout_seconds=-1
98
+ )
99
+ subscriber.start()
100
+
101
+ while True:
102
+ pass
scaler/ui/webui.py ADDED
@@ -0,0 +1,21 @@
1
+ import logging
2
+
3
+ from scaler.config.section.webui import WebUIConfig
4
+ from scaler.ui.util import NICEGUI_MAJOR_VERSION
5
+ from scaler.utility.logging.utility import setup_logger
6
+
7
+
8
+ def start_webui(config: WebUIConfig):
9
+
10
+ setup_logger(config.logging_config.paths, config.logging_config.config_file, config.logging_config.level)
11
+
12
+ if NICEGUI_MAJOR_VERSION < 3:
13
+ logging.info(f"Detected {NICEGUI_MAJOR_VERSION}. Using GUI v1.")
14
+ from scaler.ui.v1 import start_webui_v1
15
+
16
+ start_webui_v1(config)
17
+ else:
18
+ logging.info(f"Detected {NICEGUI_MAJOR_VERSION}. Using GUI v2.")
19
+ from scaler.ui.v2 import start_webui_v2
20
+
21
+ start_webui_v2(config)
File without changes
@@ -0,0 +1,19 @@
1
+ import functools
2
+ import pdb
3
+ import sys
4
+ from typing import Callable
5
+
6
+
7
+ def pdb_wrapped(func: Callable):
8
+ @functools.wraps(func)
9
+ def pdb_wrapper(*args, **kwargs):
10
+ try:
11
+ exit_code = func(*args, **kwargs)
12
+ sys.exit(exit_code)
13
+
14
+ except Exception:
15
+ ex_type, value, tb = sys.exc_info()
16
+ pdb.post_mortem(tb)
17
+ raise
18
+
19
+ return pdb_wrapper
@@ -0,0 +1,63 @@
1
+ import collections
2
+ from typing import Callable
3
+
4
+
5
+ class EventList(collections.UserList):
6
+ """A list that emits events when it is modified."""
7
+
8
+ def __init__(self, initlist=None):
9
+ super().__init__(initlist=initlist)
10
+ self._callbacks = []
11
+
12
+ def add_update_callback(self, callback: Callable[["EventList"], None]):
13
+ self._callbacks.append(callback)
14
+
15
+ def __setitem__(self, i, item):
16
+ super().__setitem__(i, item)
17
+ self._list_updated()
18
+
19
+ def __delitem__(self, i):
20
+ super().__delitem__(i)
21
+ self._list_updated()
22
+
23
+ def __add__(self, other):
24
+ super().__add__(other)
25
+ self._list_updated()
26
+
27
+ def __iadd__(self, other):
28
+ super().__iadd__(other)
29
+ self._list_updated()
30
+ return self
31
+
32
+ def append(self, item):
33
+ super().append(item)
34
+ self._list_updated()
35
+
36
+ def insert(self, i, item):
37
+ super().insert(i, item)
38
+ self._list_updated()
39
+
40
+ def pop(self, i: int = -1):
41
+ v = super().pop(i)
42
+ self._list_updated()
43
+ return v
44
+
45
+ def remove(self, item):
46
+ super().remove(item)
47
+ self._list_updated()
48
+
49
+ def clear(self) -> None:
50
+ super().clear()
51
+ self._list_updated()
52
+
53
+ def sort(self, /, *args, **kwargs):
54
+ super().sort(*args, **kwargs)
55
+ self._list_updated()
56
+
57
+ def extend(self, other) -> None:
58
+ super().extend(other)
59
+ self._list_updated()
60
+
61
+ def _list_updated(self):
62
+ for callback in self._callbacks:
63
+ callback(self)