opengris-scaler 1.12.7__cp311-cp311-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of opengris-scaler might be problematic. Click here for more details.

Files changed (234) hide show
  1. opengris_scaler-1.12.7.dist-info/METADATA +729 -0
  2. opengris_scaler-1.12.7.dist-info/RECORD +234 -0
  3. opengris_scaler-1.12.7.dist-info/WHEEL +5 -0
  4. opengris_scaler-1.12.7.dist-info/entry_points.txt +9 -0
  5. opengris_scaler-1.12.7.dist-info/licenses/LICENSE +201 -0
  6. opengris_scaler-1.12.7.dist-info/licenses/LICENSE.spdx +7 -0
  7. opengris_scaler-1.12.7.dist-info/licenses/NOTICE +8 -0
  8. opengris_scaler.libs/libcapnp-1-61c06778.1.0.so +0 -0
  9. opengris_scaler.libs/libgcc_s-2298274a.so.1 +0 -0
  10. opengris_scaler.libs/libkj-1-21b63b70.1.0.so +0 -0
  11. opengris_scaler.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
  12. scaler/CMakeLists.txt +11 -0
  13. scaler/__init__.py +14 -0
  14. scaler/about.py +5 -0
  15. scaler/client/__init__.py +0 -0
  16. scaler/client/agent/__init__.py +0 -0
  17. scaler/client/agent/client_agent.py +210 -0
  18. scaler/client/agent/disconnect_manager.py +27 -0
  19. scaler/client/agent/future_manager.py +112 -0
  20. scaler/client/agent/heartbeat_manager.py +74 -0
  21. scaler/client/agent/mixins.py +89 -0
  22. scaler/client/agent/object_manager.py +98 -0
  23. scaler/client/agent/task_manager.py +64 -0
  24. scaler/client/client.py +635 -0
  25. scaler/client/future.py +252 -0
  26. scaler/client/object_buffer.py +129 -0
  27. scaler/client/object_reference.py +25 -0
  28. scaler/client/serializer/__init__.py +0 -0
  29. scaler/client/serializer/default.py +16 -0
  30. scaler/client/serializer/mixins.py +38 -0
  31. scaler/cluster/__init__.py +0 -0
  32. scaler/cluster/cluster.py +115 -0
  33. scaler/cluster/combo.py +148 -0
  34. scaler/cluster/object_storage_server.py +45 -0
  35. scaler/cluster/scheduler.py +83 -0
  36. scaler/config/__init__.py +0 -0
  37. scaler/config/defaults.py +87 -0
  38. scaler/config/loader.py +95 -0
  39. scaler/config/mixins.py +15 -0
  40. scaler/config/section/__init__.py +0 -0
  41. scaler/config/section/cluster.py +56 -0
  42. scaler/config/section/native_worker_adapter.py +44 -0
  43. scaler/config/section/object_storage_server.py +7 -0
  44. scaler/config/section/scheduler.py +53 -0
  45. scaler/config/section/symphony_worker_adapter.py +47 -0
  46. scaler/config/section/top.py +13 -0
  47. scaler/config/section/webui.py +16 -0
  48. scaler/config/types/__init__.py +0 -0
  49. scaler/config/types/object_storage_server.py +45 -0
  50. scaler/config/types/worker.py +57 -0
  51. scaler/config/types/zmq.py +79 -0
  52. scaler/entry_points/__init__.py +0 -0
  53. scaler/entry_points/cluster.py +133 -0
  54. scaler/entry_points/object_storage_server.py +41 -0
  55. scaler/entry_points/scheduler.py +135 -0
  56. scaler/entry_points/top.py +286 -0
  57. scaler/entry_points/webui.py +26 -0
  58. scaler/entry_points/worker_adapter_native.py +137 -0
  59. scaler/entry_points/worker_adapter_symphony.py +102 -0
  60. scaler/io/__init__.py +0 -0
  61. scaler/io/async_binder.py +85 -0
  62. scaler/io/async_connector.py +95 -0
  63. scaler/io/async_object_storage_connector.py +185 -0
  64. scaler/io/mixins.py +154 -0
  65. scaler/io/sync_connector.py +68 -0
  66. scaler/io/sync_object_storage_connector.py +185 -0
  67. scaler/io/sync_subscriber.py +83 -0
  68. scaler/io/utility.py +31 -0
  69. scaler/io/ymq/CMakeLists.txt +98 -0
  70. scaler/io/ymq/__init__.py +0 -0
  71. scaler/io/ymq/_ymq.pyi +96 -0
  72. scaler/io/ymq/_ymq.so +0 -0
  73. scaler/io/ymq/bytes.h +114 -0
  74. scaler/io/ymq/common.h +29 -0
  75. scaler/io/ymq/configuration.h +60 -0
  76. scaler/io/ymq/epoll_context.cpp +185 -0
  77. scaler/io/ymq/epoll_context.h +85 -0
  78. scaler/io/ymq/error.h +132 -0
  79. scaler/io/ymq/event_loop.h +55 -0
  80. scaler/io/ymq/event_loop_thread.cpp +64 -0
  81. scaler/io/ymq/event_loop_thread.h +46 -0
  82. scaler/io/ymq/event_manager.h +81 -0
  83. scaler/io/ymq/file_descriptor.h +203 -0
  84. scaler/io/ymq/interruptive_concurrent_queue.h +169 -0
  85. scaler/io/ymq/io_context.cpp +98 -0
  86. scaler/io/ymq/io_context.h +44 -0
  87. scaler/io/ymq/io_socket.cpp +299 -0
  88. scaler/io/ymq/io_socket.h +121 -0
  89. scaler/io/ymq/iocp_context.cpp +102 -0
  90. scaler/io/ymq/iocp_context.h +83 -0
  91. scaler/io/ymq/logging.h +163 -0
  92. scaler/io/ymq/message.h +15 -0
  93. scaler/io/ymq/message_connection.h +16 -0
  94. scaler/io/ymq/message_connection_tcp.cpp +672 -0
  95. scaler/io/ymq/message_connection_tcp.h +96 -0
  96. scaler/io/ymq/network_utils.h +179 -0
  97. scaler/io/ymq/pymod_ymq/bytes.h +113 -0
  98. scaler/io/ymq/pymod_ymq/exception.h +124 -0
  99. scaler/io/ymq/pymod_ymq/gil.h +15 -0
  100. scaler/io/ymq/pymod_ymq/io_context.h +166 -0
  101. scaler/io/ymq/pymod_ymq/io_socket.h +285 -0
  102. scaler/io/ymq/pymod_ymq/message.h +99 -0
  103. scaler/io/ymq/pymod_ymq/python.h +153 -0
  104. scaler/io/ymq/pymod_ymq/ymq.cpp +23 -0
  105. scaler/io/ymq/pymod_ymq/ymq.h +357 -0
  106. scaler/io/ymq/readme.md +114 -0
  107. scaler/io/ymq/simple_interface.cpp +80 -0
  108. scaler/io/ymq/simple_interface.h +24 -0
  109. scaler/io/ymq/tcp_client.cpp +367 -0
  110. scaler/io/ymq/tcp_client.h +75 -0
  111. scaler/io/ymq/tcp_operations.h +41 -0
  112. scaler/io/ymq/tcp_server.cpp +410 -0
  113. scaler/io/ymq/tcp_server.h +79 -0
  114. scaler/io/ymq/third_party/concurrentqueue.h +3747 -0
  115. scaler/io/ymq/timed_queue.h +272 -0
  116. scaler/io/ymq/timestamp.h +102 -0
  117. scaler/io/ymq/typedefs.h +20 -0
  118. scaler/io/ymq/utils.h +34 -0
  119. scaler/io/ymq/ymq.py +130 -0
  120. scaler/object_storage/CMakeLists.txt +50 -0
  121. scaler/object_storage/__init__.py +0 -0
  122. scaler/object_storage/constants.h +11 -0
  123. scaler/object_storage/defs.h +14 -0
  124. scaler/object_storage/io_helper.cpp +44 -0
  125. scaler/object_storage/io_helper.h +9 -0
  126. scaler/object_storage/message.cpp +56 -0
  127. scaler/object_storage/message.h +130 -0
  128. scaler/object_storage/object_manager.cpp +126 -0
  129. scaler/object_storage/object_manager.h +52 -0
  130. scaler/object_storage/object_storage_server.cpp +359 -0
  131. scaler/object_storage/object_storage_server.h +126 -0
  132. scaler/object_storage/object_storage_server.so +0 -0
  133. scaler/object_storage/pymod_object_storage_server.cpp +104 -0
  134. scaler/protocol/__init__.py +0 -0
  135. scaler/protocol/capnp/__init__.py +0 -0
  136. scaler/protocol/capnp/_python.py +6 -0
  137. scaler/protocol/capnp/common.capnp +63 -0
  138. scaler/protocol/capnp/message.capnp +216 -0
  139. scaler/protocol/capnp/object_storage.capnp +52 -0
  140. scaler/protocol/capnp/status.capnp +73 -0
  141. scaler/protocol/introduction.md +105 -0
  142. scaler/protocol/python/__init__.py +0 -0
  143. scaler/protocol/python/common.py +135 -0
  144. scaler/protocol/python/message.py +726 -0
  145. scaler/protocol/python/mixins.py +13 -0
  146. scaler/protocol/python/object_storage.py +118 -0
  147. scaler/protocol/python/status.py +279 -0
  148. scaler/protocol/worker.md +228 -0
  149. scaler/scheduler/__init__.py +0 -0
  150. scaler/scheduler/allocate_policy/__init__.py +0 -0
  151. scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
  152. scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
  153. scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
  154. scaler/scheduler/allocate_policy/mixins.py +55 -0
  155. scaler/scheduler/controllers/__init__.py +0 -0
  156. scaler/scheduler/controllers/balance_controller.py +65 -0
  157. scaler/scheduler/controllers/client_controller.py +131 -0
  158. scaler/scheduler/controllers/config_controller.py +31 -0
  159. scaler/scheduler/controllers/graph_controller.py +424 -0
  160. scaler/scheduler/controllers/information_controller.py +81 -0
  161. scaler/scheduler/controllers/mixins.py +201 -0
  162. scaler/scheduler/controllers/object_controller.py +147 -0
  163. scaler/scheduler/controllers/scaling_controller.py +86 -0
  164. scaler/scheduler/controllers/task_controller.py +373 -0
  165. scaler/scheduler/controllers/worker_controller.py +168 -0
  166. scaler/scheduler/object_usage/__init__.py +0 -0
  167. scaler/scheduler/object_usage/object_tracker.py +131 -0
  168. scaler/scheduler/scheduler.py +253 -0
  169. scaler/scheduler/task/__init__.py +0 -0
  170. scaler/scheduler/task/task_state_machine.py +92 -0
  171. scaler/scheduler/task/task_state_manager.py +61 -0
  172. scaler/ui/__init__.py +0 -0
  173. scaler/ui/constants.py +9 -0
  174. scaler/ui/live_display.py +118 -0
  175. scaler/ui/memory_window.py +146 -0
  176. scaler/ui/setting_page.py +47 -0
  177. scaler/ui/task_graph.py +370 -0
  178. scaler/ui/task_log.py +83 -0
  179. scaler/ui/utility.py +35 -0
  180. scaler/ui/webui.py +125 -0
  181. scaler/ui/worker_processors.py +85 -0
  182. scaler/utility/__init__.py +0 -0
  183. scaler/utility/debug.py +19 -0
  184. scaler/utility/event_list.py +63 -0
  185. scaler/utility/event_loop.py +58 -0
  186. scaler/utility/exceptions.py +42 -0
  187. scaler/utility/formatter.py +44 -0
  188. scaler/utility/graph/__init__.py +0 -0
  189. scaler/utility/graph/optimization.py +27 -0
  190. scaler/utility/graph/topological_sorter.py +11 -0
  191. scaler/utility/graph/topological_sorter_graphblas.py +174 -0
  192. scaler/utility/identifiers.py +105 -0
  193. scaler/utility/logging/__init__.py +0 -0
  194. scaler/utility/logging/decorators.py +25 -0
  195. scaler/utility/logging/scoped_logger.py +33 -0
  196. scaler/utility/logging/utility.py +183 -0
  197. scaler/utility/many_to_many_dict.py +123 -0
  198. scaler/utility/metadata/__init__.py +0 -0
  199. scaler/utility/metadata/profile_result.py +31 -0
  200. scaler/utility/metadata/task_flags.py +30 -0
  201. scaler/utility/mixins.py +13 -0
  202. scaler/utility/network_util.py +7 -0
  203. scaler/utility/one_to_many_dict.py +72 -0
  204. scaler/utility/queues/__init__.py +0 -0
  205. scaler/utility/queues/async_indexed_queue.py +37 -0
  206. scaler/utility/queues/async_priority_queue.py +70 -0
  207. scaler/utility/queues/async_sorted_priority_queue.py +45 -0
  208. scaler/utility/queues/indexed_queue.py +114 -0
  209. scaler/utility/serialization.py +9 -0
  210. scaler/version.txt +1 -0
  211. scaler/worker/__init__.py +0 -0
  212. scaler/worker/agent/__init__.py +0 -0
  213. scaler/worker/agent/heartbeat_manager.py +107 -0
  214. scaler/worker/agent/mixins.py +137 -0
  215. scaler/worker/agent/processor/__init__.py +0 -0
  216. scaler/worker/agent/processor/object_cache.py +107 -0
  217. scaler/worker/agent/processor/processor.py +279 -0
  218. scaler/worker/agent/processor/streaming_buffer.py +28 -0
  219. scaler/worker/agent/processor_holder.py +145 -0
  220. scaler/worker/agent/processor_manager.py +365 -0
  221. scaler/worker/agent/profiling_manager.py +109 -0
  222. scaler/worker/agent/task_manager.py +150 -0
  223. scaler/worker/agent/timeout_manager.py +19 -0
  224. scaler/worker/preload.py +84 -0
  225. scaler/worker/worker.py +264 -0
  226. scaler/worker_adapter/__init__.py +0 -0
  227. scaler/worker_adapter/native.py +154 -0
  228. scaler/worker_adapter/symphony/__init__.py +0 -0
  229. scaler/worker_adapter/symphony/callback.py +45 -0
  230. scaler/worker_adapter/symphony/heartbeat_manager.py +79 -0
  231. scaler/worker_adapter/symphony/message.py +24 -0
  232. scaler/worker_adapter/symphony/task_manager.py +288 -0
  233. scaler/worker_adapter/symphony/worker.py +205 -0
  234. scaler/worker_adapter/symphony/worker_adapter.py +142 -0
@@ -0,0 +1,47 @@
1
+ import datetime
2
+
3
+ from nicegui import ui
4
+
5
+ SLIDING_WINDOW_OPTIONS = {
6
+ datetime.timedelta(minutes=5): "5",
7
+ datetime.timedelta(minutes=10): "10",
8
+ datetime.timedelta(minutes=30): "30",
9
+ }
10
+ MEMORY_STORE_TIME = max(SLIDING_WINDOW_OPTIONS.keys())
11
+
12
+ MEMORY_USAGE_SCALE_OPTIONS = {"log": "log", "linear": "linear"}
13
+
14
+ WORKER_RETENTION_TIME_OPTIONS = {
15
+ datetime.timedelta(minutes=0): "0",
16
+ datetime.timedelta(minutes=1): "1",
17
+ datetime.timedelta(minutes=5): "5",
18
+ }
19
+
20
+
21
+ class Settings:
22
+ def __init__(self):
23
+ self.stream_window = datetime.timedelta(minutes=5)
24
+ self.memory_store_time = MEMORY_STORE_TIME
25
+ self.memory_usage_scale = "log"
26
+ self.worker_retention_time = datetime.timedelta(minutes=0)
27
+
28
+ def draw_section(self):
29
+ with ui.card().classes("w-fit").classes("q-mx-auto"):
30
+ ui.label("Sliding Window Length").classes("q-mx-auto")
31
+ ui.toggle(SLIDING_WINDOW_OPTIONS).bind_value(self, "stream_window")
32
+
33
+ with ui.card().classes("w-fit").classes("q-mx-auto"):
34
+ ui.label("Memory Store Time").classes("q-mx-auto")
35
+ ui.toggle(SLIDING_WINDOW_OPTIONS).bind_value(self, "memory_store_time")
36
+
37
+ with ui.card().classes("w-fit").classes("q-mx-auto"):
38
+ ui.label("Memory Usage Scale").classes("q-mx-auto")
39
+ ui.toggle(MEMORY_USAGE_SCALE_OPTIONS).bind_value(self, "memory_usage_scale")
40
+
41
+ with ui.card().classes("w-fit").classes("q-mx-auto"):
42
+ ui.label("Worker Retention Time").classes("q-mx-auto")
43
+ ui.toggle(WORKER_RETENTION_TIME_OPTIONS).bind_value(self, "worker_retention_time")
44
+
45
+ @staticmethod
46
+ def max_window_size() -> datetime.timedelta:
47
+ return max(SLIDING_WINDOW_OPTIONS.keys())
@@ -0,0 +1,370 @@
1
+ import datetime
2
+ from collections import deque
3
+ from queue import SimpleQueue
4
+ from threading import Lock
5
+ from typing import Deque, Dict, List, Optional, Set, Tuple
6
+
7
+ from nicegui import ui
8
+
9
+ from scaler.protocol.python.common import TaskState
10
+ from scaler.protocol.python.message import StateTask
11
+ from scaler.ui.live_display import WorkersSection
12
+ from scaler.ui.setting_page import Settings
13
+ from scaler.ui.utility import format_timediff, format_worker_name, get_bounds, make_tick_text, make_ticks
14
+
15
+
16
+ class TaskColors:
17
+ RUNNING = "yellow"
18
+ NO_WORK = "white"
19
+ SUCCESS = "green"
20
+ FAILED = "red"
21
+ INACTIVE = "lightgray"
22
+ CANCELED = "black"
23
+ CANCELING = CANCELED
24
+
25
+ __task_status_to_color = {
26
+ TaskState.Inactive: INACTIVE,
27
+ TaskState.Running: RUNNING,
28
+ TaskState.Success: SUCCESS,
29
+ TaskState.Canceled: CANCELED,
30
+ TaskState.Canceling: CANCELING,
31
+ }
32
+
33
+ @staticmethod
34
+ def from_status(status: TaskState) -> str:
35
+ return TaskColors.__task_status_to_color[status]
36
+
37
+
38
+ class TaskStream:
39
+ def __init__(self):
40
+ self._figure = {}
41
+ self._plot = None
42
+
43
+ self._settings: Optional[Settings] = None
44
+
45
+ self._start_time = datetime.datetime.now() - datetime.timedelta(minutes=30)
46
+ self._last_task_tick = datetime.datetime.now()
47
+
48
+ self._current_tasks: Dict[str, Tuple[bool, Set[bytes], Optional[datetime.datetime]]] = {}
49
+ self._completed_data_cache: Dict[str, Dict] = {}
50
+
51
+ self._worker_to_object_name: Dict[str, str] = {}
52
+ self._worker_last_update: Dict[str, datetime.datetime] = {}
53
+ self._task_id_to_worker: Dict[bytes, str] = {}
54
+
55
+ self._seen_workers = set()
56
+ self._lost_workers_queue: SimpleQueue[Tuple[datetime.datetime, str]] = SimpleQueue()
57
+
58
+ self._data_update_lock = Lock()
59
+ self._busy_workers: Set[str] = set()
60
+ self._busy_workers_update_time: datetime.datetime = datetime.datetime.now()
61
+
62
+ self._dead_workers: Deque[Tuple[datetime.datetime, str]] = deque() # type: ignore[misc]
63
+
64
+ def setup_task_stream(self, settings: Settings):
65
+ with ui.card().classes("w-full").style("height: 85vh"):
66
+ fig = {
67
+ "data": [],
68
+ "layout": {
69
+ "barmode": "stack",
70
+ "autosize": True,
71
+ "margin": {"l": 163},
72
+ "xaxis": {
73
+ "autorange": False,
74
+ "range": [0, 300],
75
+ "showgrid": False,
76
+ "tickmode": "array",
77
+ "tickvals": [0, 50, 100, 150, 200, 250, 300],
78
+ "ticktext": [-300, -250, -200, -150, -100, -50, 0],
79
+ "zeroline": False,
80
+ },
81
+ "yaxis": {
82
+ "autorange": True,
83
+ "automargin": True,
84
+ "rangemode": "nonnegative",
85
+ "showgrid": False,
86
+ "type": "category",
87
+ },
88
+ },
89
+ }
90
+ self._figure = fig
91
+ self._completed_data_cache = {}
92
+ self._plot = ui.plotly(self._figure).classes("w-full h-full")
93
+ self._settings = settings
94
+
95
+ def __setup_worker_cache(self, worker: str):
96
+ if worker in self._completed_data_cache:
97
+ return
98
+ self._completed_data_cache[worker] = {
99
+ "type": "bar",
100
+ "name": "History",
101
+ "y": [],
102
+ "x": [],
103
+ "orientation": "h",
104
+ "marker": {"color": [], "width": 5},
105
+ "hovertemplate": [],
106
+ "hovertext": [],
107
+ "showlegend": False,
108
+ }
109
+
110
+ def __get_history_fields(self, worker: str, index: int) -> Tuple[float, str, str]:
111
+ worker_data = self._completed_data_cache[worker]
112
+ time_taken = worker_data["x"][index]
113
+ color = worker_data["marker"]["color"][index]
114
+ text = worker_data["hovertext"][index]
115
+ return time_taken, color, text
116
+
117
+ def __remove_last_elements(self, worker: str):
118
+ worker_data = self._completed_data_cache[worker]
119
+ del worker_data["y"][-1]
120
+ del worker_data["x"][-1]
121
+ del worker_data["marker"]["color"][-1]
122
+ del worker_data["hovertext"][-1]
123
+ del worker_data["hovertemplate"][-1]
124
+
125
+ def __add_bar(self, worker: str, time_taken: float, task_color: str, hovertext: str):
126
+ worker_history = self._completed_data_cache[worker]
127
+ if len(worker_history["y"]) > 1:
128
+ last_time_taken, last_color, last_text = self.__get_history_fields(worker, -1)
129
+
130
+ # lengthen last bar if they're the same type
131
+ if last_color == task_color and last_text == hovertext:
132
+ worker_history["x"][-1] += time_taken
133
+ return
134
+
135
+ # if there's a short gap from last task to current task, merge the bars
136
+ # this serves two purposes:
137
+ # - get a clean bar instead of many ~0 width lines
138
+ # - more importantly, make the ui significantly more responsive
139
+ if task_color != TaskColors.NO_WORK and len(worker_history["y"]) > 2:
140
+ penult_time_taken, penult_color, penult_text = self.__get_history_fields(worker, -2)
141
+
142
+ if last_time_taken < 0.1 and penult_color == task_color and penult_text == hovertext:
143
+ worker_history["x"][-2] += time_taken + last_time_taken
144
+ self.__remove_last_elements(worker)
145
+ return
146
+
147
+ self._completed_data_cache[worker]["y"].append(format_worker_name(worker))
148
+ self._completed_data_cache[worker]["x"].append(time_taken)
149
+ self._completed_data_cache[worker]["marker"]["color"].append(task_color)
150
+ self._completed_data_cache[worker]["hovertext"].append(hovertext)
151
+
152
+ if hovertext:
153
+ self._completed_data_cache[worker]["hovertemplate"].append("%{hovertext} (%{x})")
154
+ else:
155
+ self._completed_data_cache[worker]["hovertemplate"].append("")
156
+
157
+ def __remove_old_tasks_from_cache(self, worker: str, cutoff_index: int):
158
+ self._completed_data_cache[worker]["y"] = self._completed_data_cache[worker]["y"][: cutoff_index + 1]
159
+ self._completed_data_cache[worker]["x"] = self._completed_data_cache[worker]["x"][: cutoff_index + 1]
160
+
161
+ def __handle_task_result(self, state: StateTask, now: datetime.datetime):
162
+ worker = self._task_id_to_worker.get(state.task_id, "")
163
+ if worker == "":
164
+ return
165
+
166
+ task_state = state.state
167
+ self._worker_last_update[worker] = now
168
+
169
+ _, _, start = self._current_tasks.get(worker, (False, set(), None))
170
+
171
+ if start is None:
172
+ # we don't know when this task started, so just ignore
173
+ return
174
+
175
+ with self._data_update_lock:
176
+ self.__remove_task_from_worker(worker=worker, task_id=state.task_id, now=now, force_new_time=True)
177
+ self.__add_bar(
178
+ worker,
179
+ format_timediff(start, now),
180
+ TaskColors.from_status(task_state),
181
+ self._worker_to_object_name.get(worker, ""),
182
+ )
183
+
184
+ def __handle_new_worker(self, worker: str, now: datetime.datetime):
185
+ if worker not in self._completed_data_cache:
186
+ self.__setup_worker_cache(worker)
187
+ self.__add_bar(worker, format_timediff(self._start_time, now), TaskColors.NO_WORK, "")
188
+ self._seen_workers.add(worker)
189
+
190
+ def __remove_task_from_worker(self, worker: str, task_id: bytes, now: datetime.datetime, force_new_time: bool):
191
+ doing_job, task_list, prev_start_time = self._current_tasks[worker]
192
+
193
+ task_list.remove(task_id)
194
+
195
+ self._current_tasks[worker] = (len(task_list) != 0, task_list, now if force_new_time else prev_start_time)
196
+
197
+ def __handle_running_task(self, state: StateTask, worker: str, now: datetime.datetime):
198
+ # if another worker was previously assigned this task, remove it
199
+ previous_worker = self._task_id_to_worker.get(state.task_id)
200
+ if previous_worker and previous_worker != worker:
201
+ self.__remove_task_from_worker(worker=previous_worker, task_id=state.task_id, now=now, force_new_time=False)
202
+
203
+ self._task_id_to_worker[state.task_id] = worker
204
+ self._worker_to_object_name[worker] = state.function_name.decode()
205
+
206
+ doing_job, task_list, start_time = self._current_tasks.get(worker, (False, set(), None))
207
+ if doing_job:
208
+ with self._data_update_lock:
209
+ self._current_tasks[worker][1].add(state.task_id)
210
+ return
211
+
212
+ with self._data_update_lock:
213
+ self._current_tasks[worker] = (True, {state.task_id}, now)
214
+ if start_time:
215
+ self.__add_bar(worker, format_timediff(start_time, now), TaskColors.NO_WORK, "")
216
+
217
+ def handle_task_state(self, state_task: StateTask):
218
+ """
219
+ The scheduler sends out `state.worker` while a Task is running.
220
+ However, as soon as the task is done, that entry is cleared.
221
+ A Success status will thus come with an empty `state.worker`, so
222
+ we store this mapping ourselves based on the Running statuses we see.
223
+ """
224
+
225
+ task_state = state_task.state
226
+ now = datetime.datetime.now()
227
+ self._last_task_tick = now
228
+
229
+ if task_state in {TaskState.Success, TaskState.Canceling}:
230
+ self.__handle_task_result(state_task, now)
231
+ return
232
+
233
+ if not (worker := state_task.worker):
234
+ return
235
+
236
+ worker_string = worker.decode()
237
+ self._worker_last_update[worker_string] = now
238
+
239
+ if worker_string not in self._seen_workers:
240
+ self.__handle_new_worker(worker_string, now)
241
+
242
+ if task_state in {TaskState.Running}:
243
+ self.__handle_running_task(state_task, worker_string, now)
244
+
245
+ def __add_lost_worker(self, worker: str, now: datetime.datetime):
246
+ self._lost_workers_queue.put((now, worker))
247
+
248
+ def __detect_lost_workers(self, now: datetime.datetime):
249
+ removed_workers = []
250
+ for worker in self._current_tasks.keys():
251
+ last_tick = self._worker_last_update[worker]
252
+ if now - last_tick > self._settings.memory_store_time:
253
+ self.__add_lost_worker(worker, now)
254
+ removed_workers.append(worker)
255
+
256
+ for worker in removed_workers:
257
+ self._current_tasks.pop(worker)
258
+
259
+ def __remove_worker_from_history(self, worker: str):
260
+ if worker in self._completed_data_cache:
261
+ self._completed_data_cache.pop(worker)
262
+ self._seen_workers.remove(worker)
263
+
264
+ def __remove_old_tasks_from_history(self, remove_up_to: datetime.datetime):
265
+ for worker in self._completed_data_cache.keys():
266
+ worker_data = self._completed_data_cache[worker]
267
+
268
+ storage_cutoff_index = len(worker_data["x"]) - 1
269
+ time_taken = 0
270
+ while storage_cutoff_index > 0 and time_taken < remove_up_to.second:
271
+ time_taken += worker_data["x"][storage_cutoff_index]
272
+ storage_cutoff_index -= 1
273
+ if storage_cutoff_index > 0:
274
+ self.__remove_old_tasks_from_cache(worker, storage_cutoff_index)
275
+
276
+ def __remove_old_workers(self, remove_up_to: datetime.datetime):
277
+ while not self._lost_workers_queue.empty():
278
+ timestamp, worker = self._lost_workers_queue.get()
279
+ if timestamp > remove_up_to:
280
+ self._lost_workers_queue.put((timestamp, worker))
281
+ return
282
+ self.__remove_worker_from_history(worker)
283
+
284
+ def __remove_dead_workers(self, remove_up_to: datetime.datetime):
285
+ while self._dead_workers and self._dead_workers[0][0] < remove_up_to:
286
+ _, worker = self._dead_workers.popleft()
287
+ self.__remove_worker_from_history(worker)
288
+
289
+ def __split_workers_by_status(self, now: datetime.datetime) -> List[Tuple[str, float, str]]:
290
+ workers_doing_jobs = []
291
+ for worker, (doing_job, task_list, start_time) in self._current_tasks.items():
292
+ if doing_job:
293
+ worker_name = format_worker_name(worker)
294
+ duration = format_timediff(start_time, now)
295
+ object_name = self._worker_to_object_name.get(worker, "")
296
+
297
+ workers_doing_jobs.append((worker_name, duration, object_name))
298
+ return workers_doing_jobs
299
+
300
+ def mark_dead_worker(self, worker_name: str):
301
+ now = datetime.datetime.now()
302
+ with self._data_update_lock:
303
+ self._dead_workers.append((now, worker_name))
304
+
305
+ def update_data(self, workers_section: WorkersSection):
306
+ now = datetime.datetime.now()
307
+ worker_names = sorted(workers_section.workers.keys())
308
+ itls = {w: workers_section.workers[w].itl for w in worker_names}
309
+ busy_workers = {w for w in worker_names if len(itls[w]) == 3 and itls[w][1] == "1" and itls[w][2] == "1"}
310
+ for worker in worker_names:
311
+ self._worker_last_update[worker] = now
312
+
313
+ with self._data_update_lock:
314
+ self._busy_workers = busy_workers
315
+ self._busy_workers_update_time = now
316
+
317
+ def clear_stale_busy_workers(self, now: datetime.datetime):
318
+ if now - self._busy_workers_update_time > datetime.timedelta(seconds=2):
319
+ self._busy_workers = set()
320
+
321
+ def update_plot(self):
322
+ with self._data_update_lock:
323
+ now = datetime.datetime.now()
324
+
325
+ self.clear_stale_busy_workers(now)
326
+
327
+ task_update_time = self._last_task_tick
328
+ workers_doing_tasks = self.__split_workers_by_status(now)
329
+
330
+ self.__detect_lost_workers(now)
331
+ worker_history_time = now - self._settings.memory_store_time
332
+ self.__remove_old_workers(worker_history_time)
333
+ self.__remove_old_tasks_from_history(worker_history_time)
334
+
335
+ worker_retention_time = now - self._settings.worker_retention_time
336
+ self.__remove_dead_workers(worker_retention_time)
337
+
338
+ completed_cache_values = list(self._completed_data_cache.values())
339
+
340
+ if now - task_update_time >= datetime.timedelta(seconds=30):
341
+ # get rid of the in-progress plots in ['data']
342
+ self._figure["data"] = completed_cache_values
343
+ self.__render_plot(now)
344
+ return
345
+
346
+ working_data = {
347
+ "type": "bar",
348
+ "name": "Working",
349
+ "y": [w for (w, _, _) in workers_doing_tasks],
350
+ "x": [t for (_, t, _) in workers_doing_tasks],
351
+ "orientation": "h",
352
+ "text": [f for (_, _, f) in workers_doing_tasks],
353
+ "hovertemplate": "%{text} (%{x})",
354
+ "marker": {"color": TaskColors.RUNNING, "width": 5},
355
+ "showlegend": False,
356
+ }
357
+ plot_data = completed_cache_values + [working_data]
358
+ self._figure["data"] = plot_data
359
+ self.__render_plot(now)
360
+
361
+ def __render_plot(self, now: datetime.datetime):
362
+ lower_bound, upper_bound = get_bounds(now, self._start_time, self._settings)
363
+
364
+ ticks = make_ticks(lower_bound, upper_bound)
365
+ tick_text = make_tick_text(int(self._settings.stream_window.total_seconds()))
366
+
367
+ self._figure["layout"]["xaxis"]["range"] = [lower_bound, upper_bound]
368
+ self._figure["layout"]["xaxis"]["tickvals"] = ticks
369
+ self._figure["layout"]["xaxis"]["ticktext"] = tick_text
370
+ self._plot.update()
scaler/ui/task_log.py ADDED
@@ -0,0 +1,83 @@
1
+ import dataclasses
2
+ from collections import deque
3
+ from threading import Lock
4
+ from typing import Deque
5
+
6
+ from nicegui import ui
7
+
8
+ from scaler.protocol.python.common import TaskState
9
+ from scaler.protocol.python.message import StateTask
10
+ from scaler.utility.formatter import format_bytes
11
+ from scaler.utility.metadata.profile_result import ProfileResult
12
+
13
+ # TaskStatus values corresponding to completed tasks (some are in-progress e.g. Running)
14
+ COMPLETED_TASK_STATUSES = {
15
+ TaskState.Success,
16
+ TaskState.Failed,
17
+ TaskState.Canceled,
18
+ TaskState.FailedWorkerDied,
19
+ TaskState.CanceledNotFound,
20
+ }
21
+
22
+
23
+ @dataclasses.dataclass
24
+ class TaskData:
25
+ task: str = dataclasses.field(default="")
26
+ function: str = dataclasses.field(default="")
27
+ duration: str = dataclasses.field(default="")
28
+ peak_mem: str = dataclasses.field(default="")
29
+ status: str = dataclasses.field(default="")
30
+
31
+ def populate(self, state: StateTask):
32
+ self.task = f"{state.task_id.hex()}"
33
+ self.function = state.function_name.decode()
34
+ self.status = state.state.name
35
+
36
+ self.duration = "N/A"
37
+ self.peak_mem = "N/A"
38
+ if state.metadata:
39
+ profiling_data = ProfileResult.deserialize(state.metadata)
40
+ duration = profiling_data.duration_s
41
+ mem = profiling_data.memory_peak
42
+ self.duration = f"{duration:.2f}s"
43
+ self.peak_mem = format_bytes(mem) if mem != 0 else "0"
44
+
45
+ def draw_row(self):
46
+ color = "color: green" if self.status == "Finished" else "color: red"
47
+ ui.label(self.task)
48
+ ui.label(self.function)
49
+ ui.label(self.duration)
50
+ ui.label(self.peak_mem)
51
+ ui.label(self.status).style(color)
52
+
53
+ @staticmethod
54
+ def draw_titles():
55
+ ui.label("Task ID")
56
+ ui.label("Function")
57
+ ui.label("Duration")
58
+ ui.label("Peak mem")
59
+ ui.label("Status")
60
+
61
+
62
+ class TaskLogTable:
63
+ def __init__(self):
64
+ self._task_log: Deque[TaskData] = deque(maxlen=100)
65
+ self._lock = Lock()
66
+
67
+ def handle_task_state(self, state: StateTask):
68
+ if state not in COMPLETED_TASK_STATUSES:
69
+ return
70
+
71
+ row = TaskData()
72
+ row.populate(state)
73
+
74
+ with self._lock:
75
+ self._task_log.appendleft(row)
76
+
77
+ @ui.refreshable
78
+ def draw_section(self):
79
+ with self._lock:
80
+ with ui.card().classes("w-full q-mx-auto"), ui.grid(columns=5).classes("q-mx-auto"):
81
+ TaskData.draw_titles()
82
+ for task in self._task_log:
83
+ task.draw_row()
scaler/ui/utility.py ADDED
@@ -0,0 +1,35 @@
1
+ import datetime
2
+ from typing import List, Tuple
3
+
4
+ from scaler.ui.setting_page import Settings
5
+
6
+
7
+ def format_timediff(a: datetime.datetime, b: datetime.datetime) -> float:
8
+ return (b - a).total_seconds()
9
+
10
+
11
+ def format_worker_name(worker_name: str) -> str:
12
+ pid, _, host_name, hash_code = worker_name.split("|")
13
+ return f"{host_name}|{pid}"
14
+
15
+
16
+ def get_bounds(now: datetime.datetime, start_time: datetime.datetime, settings: Settings) -> Tuple[int, int]:
17
+ upper_range = now - start_time
18
+ lower_range = upper_range - settings.stream_window
19
+
20
+ bound_upper_seconds = max(upper_range.seconds, settings.stream_window.seconds)
21
+ bound_lower_seconds = 0 if bound_upper_seconds == settings.stream_window.seconds else lower_range.seconds
22
+
23
+ return bound_lower_seconds, bound_upper_seconds
24
+
25
+
26
+ def make_ticks(lower_bound: int, upper_bound: int) -> List[int]:
27
+ distance = (upper_bound - lower_bound) // 6
28
+ return list(range(lower_bound, upper_bound + 1, distance))
29
+
30
+
31
+ def make_tick_text(window_length: int) -> List[int]:
32
+ upper = 0
33
+ lower = -1 * window_length
34
+ distance = (upper - lower) // 6
35
+ return list(range(lower, upper + 1, distance))
scaler/ui/webui.py ADDED
@@ -0,0 +1,125 @@
1
+ import dataclasses
2
+ import threading
3
+ from functools import partial
4
+
5
+ from nicegui import ui
6
+
7
+ from scaler.io.sync_subscriber import ZMQSyncSubscriber
8
+ from scaler.protocol.python.message import StateScheduler, StateTask
9
+ from scaler.protocol.python.mixins import Message
10
+ from scaler.ui.constants import (
11
+ MEMORY_USAGE_UPDATE_INTERVAL,
12
+ TASK_LOG_REFRESH_INTERVAL,
13
+ TASK_STREAM_UPDATE_INTERVAL,
14
+ WORKER_PROCESSORS_REFRESH_INTERVAL,
15
+ )
16
+ from scaler.ui.live_display import SchedulerSection, WorkersSection
17
+ from scaler.ui.memory_window import MemoryChart
18
+ from scaler.ui.setting_page import Settings
19
+ from scaler.ui.task_graph import TaskStream
20
+ from scaler.ui.task_log import TaskLogTable
21
+ from scaler.ui.worker_processors import WorkerProcessors
22
+ from scaler.utility.formatter import format_bytes, format_percentage
23
+ from scaler.config.types.zmq import ZMQConfig
24
+
25
+
26
+ @dataclasses.dataclass
27
+ class Sections:
28
+ scheduler_section: SchedulerSection
29
+ workers_section: WorkersSection
30
+ task_stream_section: TaskStream
31
+ memory_usage_section: MemoryChart
32
+ tasklog_section: TaskLogTable
33
+ worker_processors: WorkerProcessors
34
+ settings_section: Settings
35
+
36
+
37
+ def start_webui(address: str, host: str, port: int):
38
+ tables = Sections(
39
+ scheduler_section=SchedulerSection(),
40
+ workers_section=WorkersSection(),
41
+ task_stream_section=TaskStream(),
42
+ memory_usage_section=MemoryChart(),
43
+ tasklog_section=TaskLogTable(),
44
+ worker_processors=WorkerProcessors(),
45
+ settings_section=Settings(),
46
+ )
47
+
48
+ with ui.tabs().classes("w-full h-full") as tabs:
49
+ live_tab = ui.tab("Live")
50
+ tasklog_tab = ui.tab("Task Log")
51
+ stream_tab = ui.tab("Worker Task Stream")
52
+ worker_processors_tab = ui.tab("Worker Processors")
53
+ settings_tab = ui.tab("Settings")
54
+
55
+ with ui.tab_panels(tabs, value=live_tab).classes("w-full"):
56
+ with ui.tab_panel(live_tab):
57
+ tables.scheduler_section.draw_section()
58
+ tables.workers_section.draw_section() # type: ignore[call-arg]
59
+
60
+ with ui.tab_panel(tasklog_tab):
61
+ tables.tasklog_section.draw_section() # type: ignore[call-arg]
62
+ ui.timer(TASK_LOG_REFRESH_INTERVAL, tables.tasklog_section.draw_section.refresh, active=True)
63
+
64
+ with ui.tab_panel(stream_tab):
65
+ tables.task_stream_section.setup_task_stream(tables.settings_section)
66
+ ui.timer(TASK_STREAM_UPDATE_INTERVAL, tables.task_stream_section.update_plot, active=True)
67
+
68
+ tables.memory_usage_section.setup_memory_chart(tables.settings_section)
69
+ ui.timer(MEMORY_USAGE_UPDATE_INTERVAL, tables.memory_usage_section.update_plot, active=True)
70
+
71
+ with ui.tab_panel(worker_processors_tab):
72
+ tables.worker_processors.draw_section() # type: ignore[call-arg]
73
+ ui.timer(WORKER_PROCESSORS_REFRESH_INTERVAL, tables.worker_processors.draw_section.refresh, active=True)
74
+
75
+ with ui.tab_panel(settings_tab):
76
+ tables.settings_section.draw_section()
77
+
78
+ subscriber = ZMQSyncSubscriber(
79
+ address=ZMQConfig.from_string(address),
80
+ callback=partial(__show_status, tables=tables),
81
+ topic=b"",
82
+ timeout_seconds=-1,
83
+ )
84
+ subscriber.start()
85
+
86
+ ui_thread = threading.Thread(target=partial(ui.run, host=host, port=port, reload=False), daemon=False)
87
+ ui_thread.start()
88
+ ui_thread.join()
89
+
90
+
91
+ def __show_status(status: Message, tables: Sections):
92
+ if isinstance(status, StateScheduler):
93
+ __update_scheduler_state(status, tables)
94
+ return
95
+
96
+ if isinstance(status, StateTask):
97
+ tables.task_stream_section.handle_task_state(status)
98
+ tables.memory_usage_section.handle_task_state(status)
99
+ tables.tasklog_section.handle_task_state(status)
100
+ return
101
+
102
+
103
+ def __update_scheduler_state(data: StateScheduler, tables: Sections):
104
+ tables.scheduler_section.cpu = format_percentage(data.scheduler.cpu)
105
+ tables.scheduler_section.rss = format_bytes(data.scheduler.rss)
106
+ tables.scheduler_section.rss_free = format_bytes(data.rss_free)
107
+
108
+ previous_workers = set(tables.workers_section.workers.keys())
109
+ current_workers = set(worker_data.worker_id.decode() for worker_data in data.worker_manager.workers)
110
+
111
+ for worker_data in data.worker_manager.workers:
112
+ worker_name = worker_data.worker_id.decode()
113
+ tables.workers_section.workers[worker_name].populate(worker_data)
114
+
115
+ for died_worker in previous_workers - current_workers:
116
+ tables.workers_section.workers.pop(died_worker)
117
+ tables.worker_processors.remove_worker(died_worker)
118
+ tables.task_stream_section.mark_dead_worker(died_worker)
119
+
120
+ if previous_workers != current_workers:
121
+ tables.workers_section.draw_section.refresh()
122
+
123
+ tables.task_stream_section.update_data(tables.workers_section)
124
+
125
+ tables.worker_processors.update_data(data.worker_manager.workers)