opengris-scaler 1.12.28__cp313-cp313-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of opengris-scaler might be problematic. Click here for more details.

Files changed (187) hide show
  1. opengris_scaler-1.12.28.dist-info/METADATA +728 -0
  2. opengris_scaler-1.12.28.dist-info/RECORD +187 -0
  3. opengris_scaler-1.12.28.dist-info/WHEEL +5 -0
  4. opengris_scaler-1.12.28.dist-info/entry_points.txt +10 -0
  5. opengris_scaler-1.12.28.dist-info/licenses/LICENSE +201 -0
  6. opengris_scaler-1.12.28.dist-info/licenses/LICENSE.spdx +7 -0
  7. opengris_scaler-1.12.28.dist-info/licenses/NOTICE +8 -0
  8. opengris_scaler.libs/libcapnp-1-e88d5415.0.1.so +0 -0
  9. opengris_scaler.libs/libgcc_s-2298274a.so.1 +0 -0
  10. opengris_scaler.libs/libkj-1-9bebd8ac.0.1.so +0 -0
  11. opengris_scaler.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
  12. scaler/__init__.py +14 -0
  13. scaler/about.py +5 -0
  14. scaler/client/__init__.py +0 -0
  15. scaler/client/agent/__init__.py +0 -0
  16. scaler/client/agent/client_agent.py +210 -0
  17. scaler/client/agent/disconnect_manager.py +27 -0
  18. scaler/client/agent/future_manager.py +112 -0
  19. scaler/client/agent/heartbeat_manager.py +74 -0
  20. scaler/client/agent/mixins.py +89 -0
  21. scaler/client/agent/object_manager.py +98 -0
  22. scaler/client/agent/task_manager.py +64 -0
  23. scaler/client/client.py +658 -0
  24. scaler/client/future.py +252 -0
  25. scaler/client/object_buffer.py +129 -0
  26. scaler/client/object_reference.py +25 -0
  27. scaler/client/serializer/__init__.py +0 -0
  28. scaler/client/serializer/default.py +16 -0
  29. scaler/client/serializer/mixins.py +38 -0
  30. scaler/cluster/__init__.py +0 -0
  31. scaler/cluster/cluster.py +115 -0
  32. scaler/cluster/combo.py +150 -0
  33. scaler/cluster/object_storage_server.py +45 -0
  34. scaler/cluster/scheduler.py +86 -0
  35. scaler/config/__init__.py +0 -0
  36. scaler/config/defaults.py +94 -0
  37. scaler/config/loader.py +96 -0
  38. scaler/config/mixins.py +20 -0
  39. scaler/config/section/__init__.py +0 -0
  40. scaler/config/section/cluster.py +55 -0
  41. scaler/config/section/ecs_worker_adapter.py +85 -0
  42. scaler/config/section/native_worker_adapter.py +43 -0
  43. scaler/config/section/object_storage_server.py +8 -0
  44. scaler/config/section/scheduler.py +54 -0
  45. scaler/config/section/symphony_worker_adapter.py +47 -0
  46. scaler/config/section/top.py +13 -0
  47. scaler/config/section/webui.py +21 -0
  48. scaler/config/types/__init__.py +0 -0
  49. scaler/config/types/network_backend.py +12 -0
  50. scaler/config/types/object_storage_server.py +45 -0
  51. scaler/config/types/worker.py +62 -0
  52. scaler/config/types/zmq.py +83 -0
  53. scaler/entry_points/__init__.py +0 -0
  54. scaler/entry_points/cluster.py +133 -0
  55. scaler/entry_points/object_storage_server.py +45 -0
  56. scaler/entry_points/scheduler.py +144 -0
  57. scaler/entry_points/top.py +286 -0
  58. scaler/entry_points/webui.py +48 -0
  59. scaler/entry_points/worker_adapter_ecs.py +191 -0
  60. scaler/entry_points/worker_adapter_native.py +137 -0
  61. scaler/entry_points/worker_adapter_symphony.py +98 -0
  62. scaler/io/__init__.py +0 -0
  63. scaler/io/async_binder.py +89 -0
  64. scaler/io/async_connector.py +95 -0
  65. scaler/io/async_object_storage_connector.py +225 -0
  66. scaler/io/mixins.py +154 -0
  67. scaler/io/sync_connector.py +68 -0
  68. scaler/io/sync_object_storage_connector.py +247 -0
  69. scaler/io/sync_subscriber.py +83 -0
  70. scaler/io/utility.py +80 -0
  71. scaler/io/ymq/__init__.py +0 -0
  72. scaler/io/ymq/_ymq.pyi +95 -0
  73. scaler/io/ymq/ymq.py +138 -0
  74. scaler/io/ymq_async_object_storage_connector.py +184 -0
  75. scaler/io/ymq_sync_object_storage_connector.py +184 -0
  76. scaler/object_storage/__init__.py +0 -0
  77. scaler/protocol/__init__.py +0 -0
  78. scaler/protocol/capnp/__init__.py +0 -0
  79. scaler/protocol/capnp/_python.py +6 -0
  80. scaler/protocol/capnp/common.capnp +68 -0
  81. scaler/protocol/capnp/message.capnp +218 -0
  82. scaler/protocol/capnp/object_storage.capnp +57 -0
  83. scaler/protocol/capnp/status.capnp +73 -0
  84. scaler/protocol/introduction.md +105 -0
  85. scaler/protocol/python/__init__.py +0 -0
  86. scaler/protocol/python/common.py +140 -0
  87. scaler/protocol/python/message.py +751 -0
  88. scaler/protocol/python/mixins.py +13 -0
  89. scaler/protocol/python/object_storage.py +118 -0
  90. scaler/protocol/python/status.py +279 -0
  91. scaler/protocol/worker.md +228 -0
  92. scaler/scheduler/__init__.py +0 -0
  93. scaler/scheduler/allocate_policy/__init__.py +0 -0
  94. scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
  95. scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
  96. scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
  97. scaler/scheduler/allocate_policy/mixins.py +55 -0
  98. scaler/scheduler/controllers/__init__.py +0 -0
  99. scaler/scheduler/controllers/balance_controller.py +65 -0
  100. scaler/scheduler/controllers/client_controller.py +131 -0
  101. scaler/scheduler/controllers/config_controller.py +31 -0
  102. scaler/scheduler/controllers/graph_controller.py +424 -0
  103. scaler/scheduler/controllers/information_controller.py +81 -0
  104. scaler/scheduler/controllers/mixins.py +194 -0
  105. scaler/scheduler/controllers/object_controller.py +147 -0
  106. scaler/scheduler/controllers/scaling_policies/__init__.py +0 -0
  107. scaler/scheduler/controllers/scaling_policies/fixed_elastic.py +145 -0
  108. scaler/scheduler/controllers/scaling_policies/mixins.py +10 -0
  109. scaler/scheduler/controllers/scaling_policies/null.py +14 -0
  110. scaler/scheduler/controllers/scaling_policies/types.py +9 -0
  111. scaler/scheduler/controllers/scaling_policies/utility.py +20 -0
  112. scaler/scheduler/controllers/scaling_policies/vanilla.py +95 -0
  113. scaler/scheduler/controllers/task_controller.py +376 -0
  114. scaler/scheduler/controllers/worker_controller.py +169 -0
  115. scaler/scheduler/object_usage/__init__.py +0 -0
  116. scaler/scheduler/object_usage/object_tracker.py +131 -0
  117. scaler/scheduler/scheduler.py +251 -0
  118. scaler/scheduler/task/__init__.py +0 -0
  119. scaler/scheduler/task/task_state_machine.py +92 -0
  120. scaler/scheduler/task/task_state_manager.py +61 -0
  121. scaler/ui/__init__.py +0 -0
  122. scaler/ui/constants.py +9 -0
  123. scaler/ui/live_display.py +147 -0
  124. scaler/ui/memory_window.py +146 -0
  125. scaler/ui/setting_page.py +40 -0
  126. scaler/ui/task_graph.py +832 -0
  127. scaler/ui/task_log.py +107 -0
  128. scaler/ui/utility.py +66 -0
  129. scaler/ui/webui.py +147 -0
  130. scaler/ui/worker_processors.py +104 -0
  131. scaler/utility/__init__.py +0 -0
  132. scaler/utility/debug.py +19 -0
  133. scaler/utility/event_list.py +63 -0
  134. scaler/utility/event_loop.py +58 -0
  135. scaler/utility/exceptions.py +42 -0
  136. scaler/utility/formatter.py +44 -0
  137. scaler/utility/graph/__init__.py +0 -0
  138. scaler/utility/graph/optimization.py +27 -0
  139. scaler/utility/graph/topological_sorter.py +11 -0
  140. scaler/utility/graph/topological_sorter_graphblas.py +174 -0
  141. scaler/utility/identifiers.py +107 -0
  142. scaler/utility/logging/__init__.py +0 -0
  143. scaler/utility/logging/decorators.py +25 -0
  144. scaler/utility/logging/scoped_logger.py +33 -0
  145. scaler/utility/logging/utility.py +183 -0
  146. scaler/utility/many_to_many_dict.py +123 -0
  147. scaler/utility/metadata/__init__.py +0 -0
  148. scaler/utility/metadata/profile_result.py +31 -0
  149. scaler/utility/metadata/task_flags.py +30 -0
  150. scaler/utility/mixins.py +13 -0
  151. scaler/utility/network_util.py +7 -0
  152. scaler/utility/one_to_many_dict.py +72 -0
  153. scaler/utility/queues/__init__.py +0 -0
  154. scaler/utility/queues/async_indexed_queue.py +37 -0
  155. scaler/utility/queues/async_priority_queue.py +70 -0
  156. scaler/utility/queues/async_sorted_priority_queue.py +45 -0
  157. scaler/utility/queues/indexed_queue.py +114 -0
  158. scaler/utility/serialization.py +9 -0
  159. scaler/version.txt +1 -0
  160. scaler/worker/__init__.py +0 -0
  161. scaler/worker/agent/__init__.py +0 -0
  162. scaler/worker/agent/heartbeat_manager.py +107 -0
  163. scaler/worker/agent/mixins.py +137 -0
  164. scaler/worker/agent/processor/__init__.py +0 -0
  165. scaler/worker/agent/processor/object_cache.py +107 -0
  166. scaler/worker/agent/processor/processor.py +285 -0
  167. scaler/worker/agent/processor/streaming_buffer.py +28 -0
  168. scaler/worker/agent/processor_holder.py +147 -0
  169. scaler/worker/agent/processor_manager.py +369 -0
  170. scaler/worker/agent/profiling_manager.py +109 -0
  171. scaler/worker/agent/task_manager.py +150 -0
  172. scaler/worker/agent/timeout_manager.py +19 -0
  173. scaler/worker/preload.py +84 -0
  174. scaler/worker/worker.py +265 -0
  175. scaler/worker_adapter/__init__.py +0 -0
  176. scaler/worker_adapter/common.py +26 -0
  177. scaler/worker_adapter/ecs.py +269 -0
  178. scaler/worker_adapter/native.py +155 -0
  179. scaler/worker_adapter/symphony/__init__.py +0 -0
  180. scaler/worker_adapter/symphony/callback.py +45 -0
  181. scaler/worker_adapter/symphony/heartbeat_manager.py +79 -0
  182. scaler/worker_adapter/symphony/message.py +24 -0
  183. scaler/worker_adapter/symphony/task_manager.py +289 -0
  184. scaler/worker_adapter/symphony/worker.py +204 -0
  185. scaler/worker_adapter/symphony/worker_adapter.py +139 -0
  186. src/scaler/io/ymq/_ymq.so +0 -0
  187. src/scaler/object_storage/object_storage_server.so +0 -0
scaler/ui/task_log.py ADDED
@@ -0,0 +1,107 @@
1
+ import dataclasses
2
+ from collections import deque
3
+ from threading import Lock
4
+ from typing import Deque, Dict, Optional
5
+
6
+ from nicegui import ui
7
+
8
+ from scaler.protocol.python.common import TaskState
9
+ from scaler.protocol.python.message import StateTask, StateWorker
10
+ from scaler.ui.utility import COMPLETED_TASK_STATUSES, display_capabilities
11
+ from scaler.utility.formatter import format_bytes
12
+ from scaler.utility.metadata.profile_result import ProfileResult
13
+
14
+ TASK_ID_HTML_TEMPLATE = (
15
+ "<span "
16
+ "style='display:inline-block;max-width:12rem;overflow:hidden;text-overflow:ellipsis;"
17
+ "white-space:nowrap;cursor:pointer;font:inherit;color:inherit' "
18
+ "title='{task}' onclick=\"navigator.clipboard.writeText('{task}')\">{task}</span>"
19
+ )
20
+
21
+
22
+ @dataclasses.dataclass
23
+ class TaskData:
24
+ task: str = dataclasses.field(default="")
25
+ function: str = dataclasses.field(default="")
26
+ duration: str = dataclasses.field(default="")
27
+ peak_mem: str = dataclasses.field(default="")
28
+ status: str = dataclasses.field(default="")
29
+ capabilities: str = dataclasses.field(default="")
30
+
31
+ def populate(
32
+ self,
33
+ state: StateTask,
34
+ function_name: str,
35
+ profiling_data: Optional[ProfileResult],
36
+ task_capabilities: Dict[str, int],
37
+ ):
38
+ self.task = f"{state.task_id.hex()}"
39
+ self.function = function_name
40
+ self.status = state.state.name
41
+
42
+ if profiling_data:
43
+ duration = profiling_data.duration_s
44
+ mem = profiling_data.memory_peak
45
+ self.duration = f"{duration:.2f}s"
46
+ self.peak_mem = format_bytes(mem) if mem != 0 else "0"
47
+ else:
48
+ self.duration = "N/A"
49
+ self.peak_mem = "N/A"
50
+
51
+ self.capabilities = display_capabilities(set(task_capabilities.keys()))
52
+
53
+ def draw_row(self):
54
+ color = "color: green" if self.status == TaskState.Success.name else "color: red"
55
+ ui.html(TASK_ID_HTML_TEMPLATE.format(task=self.task))
56
+ ui.label(self.function)
57
+ ui.label(self.duration)
58
+ ui.label(self.peak_mem)
59
+ ui.label(self.status).style(color)
60
+ ui.label(self.capabilities)
61
+
62
+ @staticmethod
63
+ def draw_titles():
64
+ ui.label("Task ID")
65
+ ui.label("Function")
66
+ ui.label("Duration")
67
+ ui.label("Peak mem")
68
+ ui.label("Status")
69
+ ui.label("Capabilities")
70
+
71
+
72
+ class TaskLogTable:
73
+ def __init__(self):
74
+ self._task_log: Deque[TaskData] = deque(maxlen=100)
75
+ self._task_id_to_function_name: Dict[str, str] = {}
76
+ self._lock: Lock = Lock()
77
+
78
+ def handle_task_state(self, state_task: StateTask):
79
+ if state_task.function_name != b"" and state_task.task_id.hex() not in self._task_id_to_function_name:
80
+ self._task_id_to_function_name[state_task.task_id.hex()] = state_task.function_name.decode()
81
+
82
+ if state_task.state not in COMPLETED_TASK_STATUSES:
83
+ return
84
+
85
+ function_name = state_task.function_name.decode()
86
+ if function_name == "":
87
+ function_name = self._task_id_to_function_name.pop(state_task.task_id.hex(), "")
88
+
89
+ # Canceled/failed states don't have profiling metadata
90
+ profiling_data = ProfileResult.deserialize(state_task.metadata) if state_task.metadata != b"" else None
91
+
92
+ row = TaskData()
93
+ row.populate(state_task, function_name, profiling_data, state_task.capabilities)
94
+
95
+ with self._lock:
96
+ self._task_log.appendleft(row)
97
+
98
+ def handle_worker_state(self, _: StateWorker):
99
+ return
100
+
101
+ @ui.refreshable
102
+ def draw_section(self):
103
+ with self._lock:
104
+ with ui.card().classes("w-full q-mx-auto"), ui.grid(columns=6).classes("q-mx-auto"):
105
+ TaskData.draw_titles()
106
+ for task in self._task_log:
107
+ task.draw_row()
scaler/ui/utility.py ADDED
@@ -0,0 +1,66 @@
1
+ import datetime
2
+ from typing import List, Set, Tuple
3
+
4
+ from scaler.protocol.python.common import TaskState
5
+ from scaler.ui.setting_page import Settings
6
+
7
+ COMPLETED_TASK_STATUSES = {
8
+ TaskState.Success,
9
+ TaskState.Canceled,
10
+ TaskState.CanceledNotFound,
11
+ TaskState.Failed,
12
+ TaskState.FailedWorkerDied,
13
+ }
14
+
15
+
16
+ def format_timediff(a: datetime.datetime, b: datetime.datetime) -> float:
17
+ return (b - a).total_seconds()
18
+
19
+
20
+ def format_worker_name(worker_name: str, cutoff: int = 15) -> str:
21
+ return worker_name[:cutoff] + "+" if len(worker_name) > cutoff else worker_name
22
+
23
+
24
+ def get_bounds(now: datetime.datetime, start_time: datetime.datetime, settings: Settings) -> Tuple[int, int]:
25
+ upper_range = now - start_time
26
+ lower_range = upper_range - settings.stream_window
27
+
28
+ bound_upper_seconds = max(upper_range.seconds, settings.stream_window.seconds)
29
+ bound_lower_seconds = 0 if bound_upper_seconds == settings.stream_window.seconds else lower_range.seconds
30
+
31
+ return bound_lower_seconds, bound_upper_seconds
32
+
33
+
34
+ def make_taskstream_ticks(lower_bound: int, upper_bound: int) -> List[int]:
35
+ distance = (upper_bound - lower_bound) // 6
36
+ return list(range(lower_bound, upper_bound + 1, distance))
37
+
38
+
39
+ def make_memory_ticks(max_bytes: int) -> Tuple[List[int], List[str]]:
40
+ units = ["B", "KB", "MB", "GB", "TB"]
41
+ vals: List[int] = [0]
42
+ texts: List[str] = ["0"]
43
+ v = 1
44
+ i = 0
45
+ # ensure at least up to 1GB on empty data
46
+ target = max(1024 * 1024 * 1024, max_bytes)
47
+ while i < len(units) and v <= target:
48
+ vals.append(v)
49
+ texts.append(f"1{units[i]}")
50
+ v *= 1024
51
+ i += 1
52
+ return vals, texts
53
+
54
+
55
+ def make_tick_text(window_length: int) -> List[int]:
56
+ upper = 0
57
+ lower = -1 * window_length
58
+ distance = (upper - lower) // 6
59
+ return list(range(lower, upper + 1, distance))
60
+
61
+
62
+ def display_capabilities(capabilities: Set[str]) -> str:
63
+ if not capabilities or len(capabilities) == 0:
64
+ return "<no capabilities>"
65
+
66
+ return " & ".join(sorted(capabilities))
scaler/ui/webui.py ADDED
@@ -0,0 +1,147 @@
1
+ import dataclasses
2
+ import logging
3
+ import threading
4
+ from functools import partial
5
+ from typing import Optional, Tuple
6
+
7
+ from nicegui import ui
8
+
9
+ from scaler.config.types.zmq import ZMQConfig
10
+ from scaler.io.sync_subscriber import ZMQSyncSubscriber
11
+ from scaler.protocol.python.message import StateBalanceAdvice, StateScheduler, StateTask, StateWorker
12
+ from scaler.protocol.python.mixins import Message
13
+ from scaler.ui.constants import (
14
+ MEMORY_USAGE_UPDATE_INTERVAL,
15
+ TASK_LOG_REFRESH_INTERVAL,
16
+ TASK_STREAM_UPDATE_INTERVAL,
17
+ WORKER_PROCESSORS_REFRESH_INTERVAL,
18
+ )
19
+ from scaler.ui.live_display import SchedulerSection, WorkersSection
20
+ from scaler.ui.memory_window import MemoryChart
21
+ from scaler.ui.setting_page import Settings
22
+ from scaler.ui.task_graph import TaskStream
23
+ from scaler.ui.task_log import TaskLogTable
24
+ from scaler.ui.worker_processors import WorkerProcessors
25
+ from scaler.utility.formatter import format_bytes, format_percentage
26
+ from scaler.utility.logging.utility import setup_logger
27
+
28
+
29
+ @dataclasses.dataclass
30
+ class Sections:
31
+ scheduler_section: SchedulerSection
32
+ workers_section: WorkersSection
33
+ task_stream_section: TaskStream
34
+ memory_usage_section: MemoryChart
35
+ tasklog_section: TaskLogTable
36
+ worker_processors: WorkerProcessors
37
+ settings_section: Settings
38
+
39
+
40
+ def start_webui(
41
+ address: str,
42
+ host: str,
43
+ port: int,
44
+ logging_paths: Tuple[str, ...],
45
+ logging_config_file: Optional[str],
46
+ logging_level: str,
47
+ ):
48
+
49
+ setup_logger(logging_paths, logging_config_file, logging_level)
50
+
51
+ tables = Sections(
52
+ scheduler_section=SchedulerSection(),
53
+ workers_section=WorkersSection(),
54
+ task_stream_section=TaskStream(),
55
+ memory_usage_section=MemoryChart(),
56
+ tasklog_section=TaskLogTable(),
57
+ worker_processors=WorkerProcessors(),
58
+ settings_section=Settings(),
59
+ )
60
+
61
+ with ui.tabs().classes("w-full h-full") as tabs:
62
+ live_tab = ui.tab("Live")
63
+ tasklog_tab = ui.tab("Task Log")
64
+ stream_tab = ui.tab("Worker Task Stream")
65
+ worker_processors_tab = ui.tab("Worker Processors")
66
+ settings_tab = ui.tab("Settings")
67
+
68
+ with ui.tab_panels(tabs, value=live_tab).classes("w-full"):
69
+ with ui.tab_panel(live_tab):
70
+ tables.scheduler_section.draw_section()
71
+ tables.workers_section.draw_section() # type: ignore[call-arg]
72
+
73
+ with ui.tab_panel(tasklog_tab):
74
+ tables.tasklog_section.draw_section() # type: ignore[call-arg]
75
+ ui.timer(TASK_LOG_REFRESH_INTERVAL, tables.tasklog_section.draw_section.refresh, active=True)
76
+
77
+ with ui.tab_panel(stream_tab):
78
+ tables.task_stream_section.setup_task_stream(tables.settings_section)
79
+ ui.timer(TASK_STREAM_UPDATE_INTERVAL, tables.task_stream_section.update_plot, active=True)
80
+
81
+ tables.memory_usage_section.setup_memory_chart(tables.settings_section)
82
+ ui.timer(MEMORY_USAGE_UPDATE_INTERVAL, tables.memory_usage_section.update_plot, active=True)
83
+
84
+ with ui.tab_panel(worker_processors_tab):
85
+ tables.worker_processors.draw_section() # type: ignore[call-arg]
86
+ ui.timer(WORKER_PROCESSORS_REFRESH_INTERVAL, tables.worker_processors.draw_section.refresh, active=True)
87
+
88
+ with ui.tab_panel(settings_tab):
89
+ tables.settings_section.draw_section()
90
+
91
+ subscriber = ZMQSyncSubscriber(
92
+ address=ZMQConfig.from_string(address),
93
+ callback=partial(__show_status, tables=tables),
94
+ topic=b"",
95
+ timeout_seconds=-1,
96
+ )
97
+ subscriber.start()
98
+
99
+ ui_thread = threading.Thread(target=partial(ui.run, host=host, port=port, reload=False), daemon=False)
100
+ ui_thread.start()
101
+ ui_thread.join()
102
+
103
+
104
+ def __show_status(status: Message, tables: Sections):
105
+ if isinstance(status, StateScheduler):
106
+ __update_scheduler_state(status, tables)
107
+ return
108
+
109
+ if isinstance(status, StateWorker):
110
+ logging.info(f"Received StateWorker update for worker {status.worker_id.decode()} with {status.state.name}")
111
+ tables.scheduler_section.handle_worker_state(status)
112
+ tables.workers_section.handle_worker_state(status)
113
+ tables.task_stream_section.handle_worker_state(status)
114
+ tables.memory_usage_section.handle_worker_state(status)
115
+ tables.tasklog_section.handle_worker_state(status)
116
+ tables.worker_processors.handle_worker_state(status)
117
+ tables.settings_section.handle_worker_state(status)
118
+ return
119
+
120
+ if isinstance(status, StateTask):
121
+ logging.debug(f"Received StateTask update for task {status.task_id.hex()} with {status.state.name}")
122
+ tables.scheduler_section.handle_task_state(status)
123
+ tables.workers_section.handle_task_state(status)
124
+ tables.task_stream_section.handle_task_state(status)
125
+ tables.memory_usage_section.handle_task_state(status)
126
+ tables.tasklog_section.handle_task_state(status)
127
+ tables.worker_processors.handle_task_state(status)
128
+ tables.settings_section.handle_task_state(status)
129
+ return
130
+
131
+ if isinstance(status, StateBalanceAdvice):
132
+ logging.debug(f"Received StateBalanceAdvice for {status.worker_id.decode()} with {len(status.task_ids)} tasks")
133
+ return
134
+
135
+ logging.info(f"Unhandled message received: {type(status)}")
136
+
137
+
138
+ def __update_scheduler_state(data: StateScheduler, tables: Sections):
139
+ tables.scheduler_section.cpu = format_percentage(data.scheduler.cpu)
140
+ tables.scheduler_section.rss = format_bytes(data.scheduler.rss)
141
+ tables.scheduler_section.rss_free = format_bytes(data.rss_free)
142
+
143
+ for worker_data in data.worker_manager.workers:
144
+ worker_name = worker_data.worker_id.decode()
145
+ tables.workers_section.workers[worker_name].populate(worker_data)
146
+
147
+ tables.worker_processors.update_data(data.worker_manager.workers)
@@ -0,0 +1,104 @@
1
+ import dataclasses
2
+ from threading import Lock
3
+ from typing import Dict, List, Optional
4
+
5
+ from nicegui import ui
6
+ from nicegui.element import Element
7
+
8
+ from scaler.protocol.python.common import WorkerState
9
+ from scaler.protocol.python.message import StateTask, StateWorker
10
+ from scaler.protocol.python.status import ProcessorStatus, WorkerStatus
11
+ from scaler.ui.utility import format_worker_name
12
+
13
+
14
+ @dataclasses.dataclass
15
+ class WorkerProcessors:
16
+ workers: Dict[str, "WorkerProcessorTable"] = dataclasses.field(default_factory=dict)
17
+ _lock: Lock = Lock()
18
+
19
+ @ui.refreshable
20
+ def draw_section(self):
21
+ with self._lock:
22
+ for processor_table in self.workers.values():
23
+ processor_table.draw_table()
24
+
25
+ def update_data(self, data: List[WorkerStatus]):
26
+ with self._lock:
27
+ for worker in data:
28
+ worker_name = worker.worker_id.decode()
29
+ processor_table = self.workers.get(worker_name)
30
+
31
+ if processor_table is None:
32
+ processor_table = WorkerProcessorTable(worker_name, 0, worker.rss_free, worker.processor_statuses)
33
+ self.workers[worker_name] = processor_table
34
+ elif processor_table.processor_statuses != worker.processor_statuses:
35
+ processor_table.processor_statuses = worker.processor_statuses
36
+
37
+ def remove_worker(self, dead_worker: str):
38
+ with self._lock:
39
+ self.workers.pop(dead_worker, None)
40
+
41
+ def handle_task_state(self, _: StateTask):
42
+ return
43
+
44
+ def handle_worker_state(self, state_worker: StateWorker):
45
+ worker_id = state_worker.worker_id.decode()
46
+ state = state_worker.state
47
+
48
+ if state == WorkerState.Disconnected:
49
+ self.remove_worker(worker_id)
50
+
51
+
52
+ @dataclasses.dataclass
53
+ class WorkerProcessorTable:
54
+ worker_name: str
55
+ rss_max: int
56
+ rss_free: int
57
+ processor_statuses: List[ProcessorStatus]
58
+
59
+ handler: Optional[Element] = dataclasses.field(default=None)
60
+
61
+ def draw_table(self):
62
+ formatted_worker_name = format_worker_name(self.worker_name)
63
+ with ui.card().classes("w-full") as handler:
64
+ self.handler = handler
65
+
66
+ ui.markdown(f"Worker **{formatted_worker_name}**").classes("text-xl")
67
+
68
+ with ui.grid(columns=7).classes("w-full"):
69
+ self.draw_titles()
70
+ for processor in sorted(self.processor_statuses, key=lambda x: x.pid):
71
+ if processor.resource.rss > self.rss_max:
72
+ self.rss_max = processor.resource.rss
73
+
74
+ self.draw_row(processor, self.rss_free, self.rss_max)
75
+
76
+ @staticmethod
77
+ def draw_titles():
78
+ ui.label("Processor PID")
79
+ ui.label("CPU %")
80
+ ui.label("RSS (in MB)")
81
+ ui.label("Max RSS (in MB)")
82
+ ui.label("Initialized")
83
+ ui.label("Has Task")
84
+ ui.label("Suspended")
85
+
86
+ @staticmethod
87
+ def draw_row(processor_status: ProcessorStatus, rss_free: int, rss_max: int):
88
+ cpu = processor_status.resource.cpu / 10
89
+ rss = int(processor_status.resource.rss / 1e6)
90
+ rss_max = int(rss_max / 1e6)
91
+ rss_free = int(rss_free / 1e6)
92
+
93
+ ui.label(str(processor_status.pid))
94
+ ui.knob(value=cpu, track_color="grey-2", show_value=True, min=0, max=100)
95
+ ui.knob(value=rss, track_color="grey-2", show_value=True, min=0, max=rss + rss_free)
96
+ ui.knob(value=rss_max, track_color="grey-2", show_value=True, min=0, max=rss + rss_free)
97
+ ui.checkbox().bind_value_from(processor_status, "initialized")
98
+ ui.checkbox().bind_value_from(processor_status, "has_task")
99
+ ui.checkbox().bind_value_from(processor_status, "suspended")
100
+
101
+ def delete_row(self):
102
+ assert self.handler is not None
103
+ self.handler.clear()
104
+ self.handler.delete()
File without changes
@@ -0,0 +1,19 @@
1
+ import functools
2
+ import pdb
3
+ import sys
4
+ from typing import Callable
5
+
6
+
7
+ def pdb_wrapped(func: Callable):
8
+ @functools.wraps(func)
9
+ def pdb_wrapper(*args, **kwargs):
10
+ try:
11
+ exit_code = func(*args, **kwargs)
12
+ sys.exit(exit_code)
13
+
14
+ except Exception:
15
+ ex_type, value, tb = sys.exc_info()
16
+ pdb.post_mortem(tb)
17
+ raise
18
+
19
+ return pdb_wrapper
@@ -0,0 +1,63 @@
1
+ import collections
2
+ from typing import Callable
3
+
4
+
5
+ class EventList(collections.UserList):
6
+ """A list that emits events when it is modified."""
7
+
8
+ def __init__(self, initlist=None):
9
+ super().__init__(initlist=initlist)
10
+ self._callbacks = []
11
+
12
+ def add_update_callback(self, callback: Callable[["EventList"], None]):
13
+ self._callbacks.append(callback)
14
+
15
+ def __setitem__(self, i, item):
16
+ super().__setitem__(i, item)
17
+ self._list_updated()
18
+
19
+ def __delitem__(self, i):
20
+ super().__delitem__(i)
21
+ self._list_updated()
22
+
23
+ def __add__(self, other):
24
+ super().__add__(other)
25
+ self._list_updated()
26
+
27
+ def __iadd__(self, other):
28
+ super().__iadd__(other)
29
+ self._list_updated()
30
+ return self
31
+
32
+ def append(self, item):
33
+ super().append(item)
34
+ self._list_updated()
35
+
36
+ def insert(self, i, item):
37
+ super().insert(i, item)
38
+ self._list_updated()
39
+
40
+ def pop(self, i: int = -1):
41
+ v = super().pop(i)
42
+ self._list_updated()
43
+ return v
44
+
45
+ def remove(self, item):
46
+ super().remove(item)
47
+ self._list_updated()
48
+
49
+ def clear(self) -> None:
50
+ super().clear()
51
+ self._list_updated()
52
+
53
+ def sort(self, /, *args, **kwargs):
54
+ super().sort(*args, **kwargs)
55
+ self._list_updated()
56
+
57
+ def extend(self, other) -> None:
58
+ super().extend(other)
59
+ self._list_updated()
60
+
61
+ def _list_updated(self):
62
+ for callback in self._callbacks:
63
+ callback(self)
@@ -0,0 +1,58 @@
1
+ import asyncio
2
+ import enum
3
+ import logging
4
+ from typing import Awaitable, Callable
5
+
6
+
7
+ class EventLoopType(enum.Enum):
8
+ builtin = enum.auto()
9
+ uvloop = enum.auto()
10
+
11
+ @staticmethod
12
+ def allowed_types():
13
+ return {m.name for m in EventLoopType}
14
+
15
+
16
+ def register_event_loop(event_loop_type: str):
17
+ if event_loop_type not in EventLoopType.allowed_types():
18
+ raise TypeError(f"allowed event loop types are: {EventLoopType.allowed_types()}")
19
+
20
+ event_loop_type_enum = EventLoopType[event_loop_type]
21
+ if event_loop_type_enum == EventLoopType.uvloop:
22
+ try:
23
+ import uvloop # noqa
24
+ except ImportError:
25
+ raise ImportError("please use pip install uvloop if try to use uvloop as event loop")
26
+
27
+ uvloop.install()
28
+
29
+ assert event_loop_type in EventLoopType.allowed_types()
30
+
31
+ logging.info(f"use event loop: {event_loop_type}")
32
+
33
+
34
+ def create_async_loop_routine(routine: Callable[[], Awaitable], seconds: int):
35
+ """create async loop routine,
36
+
37
+ - if seconds is negative, means disable
38
+ - 0 means looping without any wait, as fast as possible
39
+ - positive number means execute routine every positive seconds, if passing 1 means run once every 1 seconds"""
40
+
41
+ async def loop():
42
+ if seconds < 0:
43
+ logging.info(f"{routine.__self__.__class__.__name__}: disabled") # type: ignore[attr-defined]
44
+ return
45
+
46
+ logging.info(f"{routine.__self__.__class__.__name__}: started") # type: ignore[attr-defined]
47
+ try:
48
+ while True:
49
+ await routine()
50
+ await asyncio.sleep(seconds)
51
+ except asyncio.CancelledError:
52
+ pass
53
+ except KeyboardInterrupt:
54
+ pass
55
+
56
+ logging.info(f"{routine.__self__.__class__.__name__}: exited") # type: ignore[attr-defined]
57
+
58
+ return loop()
@@ -0,0 +1,42 @@
1
+ class TaskNotFoundError(Exception):
2
+ pass
3
+
4
+
5
+ class WorkerDiedError(Exception):
6
+ pass
7
+
8
+
9
+ class NoWorkerError(Exception):
10
+ pass
11
+
12
+
13
+ class DisconnectedError(Exception):
14
+ pass
15
+
16
+
17
+ class ProcessorDiedError(Exception):
18
+ pass
19
+
20
+
21
+ class DeserializeObjectError(Exception):
22
+ pass
23
+
24
+
25
+ class MissingObjects(Exception):
26
+ pass
27
+
28
+
29
+ class ClientCancelledException(Exception):
30
+ pass
31
+
32
+
33
+ class ClientShutdownException(Exception):
34
+ pass
35
+
36
+
37
+ class ClientQuitException(Exception):
38
+ pass
39
+
40
+
41
+ class ObjectStorageException(Exception):
42
+ pass
@@ -0,0 +1,44 @@
1
+ STORAGE_SIZE_MODULUS = 1024.0
2
+ TIME_MODULUS = 1000
3
+
4
+
5
+ def format_bytes(number) -> str:
6
+ for unit in ["B", "K", "M", "G", "T"]:
7
+ if number >= STORAGE_SIZE_MODULUS:
8
+ number /= STORAGE_SIZE_MODULUS
9
+ continue
10
+
11
+ if unit in {"B", "K"}:
12
+ return f"{int(number)}{unit}"
13
+
14
+ return f"{number:.1f}{unit}"
15
+
16
+ raise ValueError("This should not happen")
17
+
18
+
19
+ def format_integer(number):
20
+ return f"{number:,}"
21
+
22
+
23
+ def format_percentage(number: int):
24
+ return f"{(number/1000):.1%}"
25
+
26
+
27
+ def format_microseconds(number: int):
28
+ for unit in ["us", "ms", "s"]:
29
+ if number >= TIME_MODULUS:
30
+ number = int(number / TIME_MODULUS)
31
+ continue
32
+
33
+ if unit == "us":
34
+ return f"{number/TIME_MODULUS:.1f}ms"
35
+
36
+ too_big_sign = "+" if unit == "s" and number > TIME_MODULUS else ""
37
+ return f"{int(number)}{too_big_sign}{unit}"
38
+
39
+
40
+ def format_seconds(number: int):
41
+ if number > 60:
42
+ return "60+s"
43
+
44
+ return f"{number}s"
File without changes
@@ -0,0 +1,27 @@
1
+ from collections import deque
2
+ from typing import Any, Callable, Dict, List, Tuple, Union
3
+
4
+
5
+ def cull_graph(
6
+ graph: Dict[str, Tuple[Union[Callable, Any], ...]], keys: List[str]
7
+ ) -> Dict[str, Tuple[Union[Callable, Any], ...]]:
8
+ queue = deque(keys)
9
+ visited = set()
10
+ for target_key in keys:
11
+ visited.add(target_key)
12
+
13
+ while queue:
14
+ key = queue.popleft()
15
+
16
+ task = graph[key]
17
+ if not (isinstance(task, tuple) and task and callable(task[0])):
18
+ continue
19
+
20
+ dependencies = set(task[1:])
21
+ for predecessor_key in dependencies:
22
+ if predecessor_key in visited:
23
+ continue
24
+ visited.add(predecessor_key)
25
+ queue.append(predecessor_key)
26
+
27
+ return {key: graph[key] for key in visited}