opengris-scaler 1.12.7__cp312-cp312-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of opengris-scaler might be problematic. Click here for more details.

Files changed (234) hide show
  1. opengris_scaler-1.12.7.dist-info/METADATA +729 -0
  2. opengris_scaler-1.12.7.dist-info/RECORD +234 -0
  3. opengris_scaler-1.12.7.dist-info/WHEEL +5 -0
  4. opengris_scaler-1.12.7.dist-info/entry_points.txt +9 -0
  5. opengris_scaler-1.12.7.dist-info/licenses/LICENSE +201 -0
  6. opengris_scaler-1.12.7.dist-info/licenses/LICENSE.spdx +7 -0
  7. opengris_scaler-1.12.7.dist-info/licenses/NOTICE +8 -0
  8. opengris_scaler.libs/libcapnp-1-61c06778.1.0.so +0 -0
  9. opengris_scaler.libs/libgcc_s-2298274a.so.1 +0 -0
  10. opengris_scaler.libs/libkj-1-21b63b70.1.0.so +0 -0
  11. opengris_scaler.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
  12. scaler/CMakeLists.txt +11 -0
  13. scaler/__init__.py +14 -0
  14. scaler/about.py +5 -0
  15. scaler/client/__init__.py +0 -0
  16. scaler/client/agent/__init__.py +0 -0
  17. scaler/client/agent/client_agent.py +210 -0
  18. scaler/client/agent/disconnect_manager.py +27 -0
  19. scaler/client/agent/future_manager.py +112 -0
  20. scaler/client/agent/heartbeat_manager.py +74 -0
  21. scaler/client/agent/mixins.py +89 -0
  22. scaler/client/agent/object_manager.py +98 -0
  23. scaler/client/agent/task_manager.py +64 -0
  24. scaler/client/client.py +635 -0
  25. scaler/client/future.py +252 -0
  26. scaler/client/object_buffer.py +129 -0
  27. scaler/client/object_reference.py +25 -0
  28. scaler/client/serializer/__init__.py +0 -0
  29. scaler/client/serializer/default.py +16 -0
  30. scaler/client/serializer/mixins.py +38 -0
  31. scaler/cluster/__init__.py +0 -0
  32. scaler/cluster/cluster.py +115 -0
  33. scaler/cluster/combo.py +148 -0
  34. scaler/cluster/object_storage_server.py +45 -0
  35. scaler/cluster/scheduler.py +83 -0
  36. scaler/config/__init__.py +0 -0
  37. scaler/config/defaults.py +87 -0
  38. scaler/config/loader.py +95 -0
  39. scaler/config/mixins.py +15 -0
  40. scaler/config/section/__init__.py +0 -0
  41. scaler/config/section/cluster.py +56 -0
  42. scaler/config/section/native_worker_adapter.py +44 -0
  43. scaler/config/section/object_storage_server.py +7 -0
  44. scaler/config/section/scheduler.py +53 -0
  45. scaler/config/section/symphony_worker_adapter.py +47 -0
  46. scaler/config/section/top.py +13 -0
  47. scaler/config/section/webui.py +16 -0
  48. scaler/config/types/__init__.py +0 -0
  49. scaler/config/types/object_storage_server.py +45 -0
  50. scaler/config/types/worker.py +57 -0
  51. scaler/config/types/zmq.py +79 -0
  52. scaler/entry_points/__init__.py +0 -0
  53. scaler/entry_points/cluster.py +133 -0
  54. scaler/entry_points/object_storage_server.py +41 -0
  55. scaler/entry_points/scheduler.py +135 -0
  56. scaler/entry_points/top.py +286 -0
  57. scaler/entry_points/webui.py +26 -0
  58. scaler/entry_points/worker_adapter_native.py +137 -0
  59. scaler/entry_points/worker_adapter_symphony.py +102 -0
  60. scaler/io/__init__.py +0 -0
  61. scaler/io/async_binder.py +85 -0
  62. scaler/io/async_connector.py +95 -0
  63. scaler/io/async_object_storage_connector.py +185 -0
  64. scaler/io/mixins.py +154 -0
  65. scaler/io/sync_connector.py +68 -0
  66. scaler/io/sync_object_storage_connector.py +185 -0
  67. scaler/io/sync_subscriber.py +83 -0
  68. scaler/io/utility.py +31 -0
  69. scaler/io/ymq/CMakeLists.txt +98 -0
  70. scaler/io/ymq/__init__.py +0 -0
  71. scaler/io/ymq/_ymq.pyi +96 -0
  72. scaler/io/ymq/_ymq.so +0 -0
  73. scaler/io/ymq/bytes.h +114 -0
  74. scaler/io/ymq/common.h +29 -0
  75. scaler/io/ymq/configuration.h +60 -0
  76. scaler/io/ymq/epoll_context.cpp +185 -0
  77. scaler/io/ymq/epoll_context.h +85 -0
  78. scaler/io/ymq/error.h +132 -0
  79. scaler/io/ymq/event_loop.h +55 -0
  80. scaler/io/ymq/event_loop_thread.cpp +64 -0
  81. scaler/io/ymq/event_loop_thread.h +46 -0
  82. scaler/io/ymq/event_manager.h +81 -0
  83. scaler/io/ymq/file_descriptor.h +203 -0
  84. scaler/io/ymq/interruptive_concurrent_queue.h +169 -0
  85. scaler/io/ymq/io_context.cpp +98 -0
  86. scaler/io/ymq/io_context.h +44 -0
  87. scaler/io/ymq/io_socket.cpp +299 -0
  88. scaler/io/ymq/io_socket.h +121 -0
  89. scaler/io/ymq/iocp_context.cpp +102 -0
  90. scaler/io/ymq/iocp_context.h +83 -0
  91. scaler/io/ymq/logging.h +163 -0
  92. scaler/io/ymq/message.h +15 -0
  93. scaler/io/ymq/message_connection.h +16 -0
  94. scaler/io/ymq/message_connection_tcp.cpp +672 -0
  95. scaler/io/ymq/message_connection_tcp.h +96 -0
  96. scaler/io/ymq/network_utils.h +179 -0
  97. scaler/io/ymq/pymod_ymq/bytes.h +113 -0
  98. scaler/io/ymq/pymod_ymq/exception.h +124 -0
  99. scaler/io/ymq/pymod_ymq/gil.h +15 -0
  100. scaler/io/ymq/pymod_ymq/io_context.h +166 -0
  101. scaler/io/ymq/pymod_ymq/io_socket.h +285 -0
  102. scaler/io/ymq/pymod_ymq/message.h +99 -0
  103. scaler/io/ymq/pymod_ymq/python.h +153 -0
  104. scaler/io/ymq/pymod_ymq/ymq.cpp +23 -0
  105. scaler/io/ymq/pymod_ymq/ymq.h +357 -0
  106. scaler/io/ymq/readme.md +114 -0
  107. scaler/io/ymq/simple_interface.cpp +80 -0
  108. scaler/io/ymq/simple_interface.h +24 -0
  109. scaler/io/ymq/tcp_client.cpp +367 -0
  110. scaler/io/ymq/tcp_client.h +75 -0
  111. scaler/io/ymq/tcp_operations.h +41 -0
  112. scaler/io/ymq/tcp_server.cpp +410 -0
  113. scaler/io/ymq/tcp_server.h +79 -0
  114. scaler/io/ymq/third_party/concurrentqueue.h +3747 -0
  115. scaler/io/ymq/timed_queue.h +272 -0
  116. scaler/io/ymq/timestamp.h +102 -0
  117. scaler/io/ymq/typedefs.h +20 -0
  118. scaler/io/ymq/utils.h +34 -0
  119. scaler/io/ymq/ymq.py +130 -0
  120. scaler/object_storage/CMakeLists.txt +50 -0
  121. scaler/object_storage/__init__.py +0 -0
  122. scaler/object_storage/constants.h +11 -0
  123. scaler/object_storage/defs.h +14 -0
  124. scaler/object_storage/io_helper.cpp +44 -0
  125. scaler/object_storage/io_helper.h +9 -0
  126. scaler/object_storage/message.cpp +56 -0
  127. scaler/object_storage/message.h +130 -0
  128. scaler/object_storage/object_manager.cpp +126 -0
  129. scaler/object_storage/object_manager.h +52 -0
  130. scaler/object_storage/object_storage_server.cpp +359 -0
  131. scaler/object_storage/object_storage_server.h +126 -0
  132. scaler/object_storage/object_storage_server.so +0 -0
  133. scaler/object_storage/pymod_object_storage_server.cpp +104 -0
  134. scaler/protocol/__init__.py +0 -0
  135. scaler/protocol/capnp/__init__.py +0 -0
  136. scaler/protocol/capnp/_python.py +6 -0
  137. scaler/protocol/capnp/common.capnp +63 -0
  138. scaler/protocol/capnp/message.capnp +216 -0
  139. scaler/protocol/capnp/object_storage.capnp +52 -0
  140. scaler/protocol/capnp/status.capnp +73 -0
  141. scaler/protocol/introduction.md +105 -0
  142. scaler/protocol/python/__init__.py +0 -0
  143. scaler/protocol/python/common.py +135 -0
  144. scaler/protocol/python/message.py +726 -0
  145. scaler/protocol/python/mixins.py +13 -0
  146. scaler/protocol/python/object_storage.py +118 -0
  147. scaler/protocol/python/status.py +279 -0
  148. scaler/protocol/worker.md +228 -0
  149. scaler/scheduler/__init__.py +0 -0
  150. scaler/scheduler/allocate_policy/__init__.py +0 -0
  151. scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
  152. scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
  153. scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
  154. scaler/scheduler/allocate_policy/mixins.py +55 -0
  155. scaler/scheduler/controllers/__init__.py +0 -0
  156. scaler/scheduler/controllers/balance_controller.py +65 -0
  157. scaler/scheduler/controllers/client_controller.py +131 -0
  158. scaler/scheduler/controllers/config_controller.py +31 -0
  159. scaler/scheduler/controllers/graph_controller.py +424 -0
  160. scaler/scheduler/controllers/information_controller.py +81 -0
  161. scaler/scheduler/controllers/mixins.py +201 -0
  162. scaler/scheduler/controllers/object_controller.py +147 -0
  163. scaler/scheduler/controllers/scaling_controller.py +86 -0
  164. scaler/scheduler/controllers/task_controller.py +373 -0
  165. scaler/scheduler/controllers/worker_controller.py +168 -0
  166. scaler/scheduler/object_usage/__init__.py +0 -0
  167. scaler/scheduler/object_usage/object_tracker.py +131 -0
  168. scaler/scheduler/scheduler.py +253 -0
  169. scaler/scheduler/task/__init__.py +0 -0
  170. scaler/scheduler/task/task_state_machine.py +92 -0
  171. scaler/scheduler/task/task_state_manager.py +61 -0
  172. scaler/ui/__init__.py +0 -0
  173. scaler/ui/constants.py +9 -0
  174. scaler/ui/live_display.py +118 -0
  175. scaler/ui/memory_window.py +146 -0
  176. scaler/ui/setting_page.py +47 -0
  177. scaler/ui/task_graph.py +370 -0
  178. scaler/ui/task_log.py +83 -0
  179. scaler/ui/utility.py +35 -0
  180. scaler/ui/webui.py +125 -0
  181. scaler/ui/worker_processors.py +85 -0
  182. scaler/utility/__init__.py +0 -0
  183. scaler/utility/debug.py +19 -0
  184. scaler/utility/event_list.py +63 -0
  185. scaler/utility/event_loop.py +58 -0
  186. scaler/utility/exceptions.py +42 -0
  187. scaler/utility/formatter.py +44 -0
  188. scaler/utility/graph/__init__.py +0 -0
  189. scaler/utility/graph/optimization.py +27 -0
  190. scaler/utility/graph/topological_sorter.py +11 -0
  191. scaler/utility/graph/topological_sorter_graphblas.py +174 -0
  192. scaler/utility/identifiers.py +105 -0
  193. scaler/utility/logging/__init__.py +0 -0
  194. scaler/utility/logging/decorators.py +25 -0
  195. scaler/utility/logging/scoped_logger.py +33 -0
  196. scaler/utility/logging/utility.py +183 -0
  197. scaler/utility/many_to_many_dict.py +123 -0
  198. scaler/utility/metadata/__init__.py +0 -0
  199. scaler/utility/metadata/profile_result.py +31 -0
  200. scaler/utility/metadata/task_flags.py +30 -0
  201. scaler/utility/mixins.py +13 -0
  202. scaler/utility/network_util.py +7 -0
  203. scaler/utility/one_to_many_dict.py +72 -0
  204. scaler/utility/queues/__init__.py +0 -0
  205. scaler/utility/queues/async_indexed_queue.py +37 -0
  206. scaler/utility/queues/async_priority_queue.py +70 -0
  207. scaler/utility/queues/async_sorted_priority_queue.py +45 -0
  208. scaler/utility/queues/indexed_queue.py +114 -0
  209. scaler/utility/serialization.py +9 -0
  210. scaler/version.txt +1 -0
  211. scaler/worker/__init__.py +0 -0
  212. scaler/worker/agent/__init__.py +0 -0
  213. scaler/worker/agent/heartbeat_manager.py +107 -0
  214. scaler/worker/agent/mixins.py +137 -0
  215. scaler/worker/agent/processor/__init__.py +0 -0
  216. scaler/worker/agent/processor/object_cache.py +107 -0
  217. scaler/worker/agent/processor/processor.py +279 -0
  218. scaler/worker/agent/processor/streaming_buffer.py +28 -0
  219. scaler/worker/agent/processor_holder.py +145 -0
  220. scaler/worker/agent/processor_manager.py +365 -0
  221. scaler/worker/agent/profiling_manager.py +109 -0
  222. scaler/worker/agent/task_manager.py +150 -0
  223. scaler/worker/agent/timeout_manager.py +19 -0
  224. scaler/worker/preload.py +84 -0
  225. scaler/worker/worker.py +264 -0
  226. scaler/worker_adapter/__init__.py +0 -0
  227. scaler/worker_adapter/native.py +154 -0
  228. scaler/worker_adapter/symphony/__init__.py +0 -0
  229. scaler/worker_adapter/symphony/callback.py +45 -0
  230. scaler/worker_adapter/symphony/heartbeat_manager.py +79 -0
  231. scaler/worker_adapter/symphony/message.py +24 -0
  232. scaler/worker_adapter/symphony/task_manager.py +288 -0
  233. scaler/worker_adapter/symphony/worker.py +205 -0
  234. scaler/worker_adapter/symphony/worker_adapter.py +142 -0
@@ -0,0 +1,253 @@
1
+ import asyncio
2
+ import functools
3
+ import logging
4
+
5
+ import zmq.asyncio
6
+
7
+ from scaler.io.async_binder import ZMQAsyncBinder
8
+ from scaler.io.async_connector import ZMQAsyncConnector
9
+ from scaler.io.async_object_storage_connector import PyAsyncObjectStorageConnector
10
+ from scaler.config.defaults import CLEANUP_INTERVAL_SECONDS, STATUS_REPORT_INTERVAL_SECONDS
11
+ from scaler.io.mixins import AsyncBinder, AsyncConnector, AsyncObjectStorageConnector
12
+ from scaler.protocol.python.common import ObjectStorageAddress
13
+ from scaler.protocol.python.message import (
14
+ ClientDisconnect,
15
+ ClientHeartbeat,
16
+ DisconnectRequest,
17
+ GraphTask,
18
+ InformationRequest,
19
+ ObjectInstruction,
20
+ Task,
21
+ TaskCancel,
22
+ TaskCancelConfirm,
23
+ TaskLog,
24
+ TaskResult,
25
+ WorkerHeartbeat,
26
+ )
27
+ from scaler.protocol.python.mixins import Message
28
+ from scaler.config.types.zmq import ZMQConfig, ZMQType
29
+ from scaler.config.section.scheduler import SchedulerConfig
30
+ from scaler.scheduler.controllers.balance_controller import VanillaBalanceController
31
+ from scaler.scheduler.controllers.client_controller import VanillaClientController
32
+ from scaler.scheduler.controllers.config_controller import VanillaConfigController
33
+ from scaler.scheduler.controllers.graph_controller import VanillaGraphTaskController
34
+ from scaler.scheduler.controllers.information_controller import VanillaInformationController
35
+ from scaler.scheduler.controllers.object_controller import VanillaObjectController
36
+ from scaler.scheduler.controllers.scaling_controller import NullScalingController, VanillaScalingController
37
+ from scaler.scheduler.controllers.task_controller import VanillaTaskController
38
+ from scaler.scheduler.controllers.worker_controller import VanillaWorkerController
39
+ from scaler.utility.event_loop import create_async_loop_routine
40
+ from scaler.utility.exceptions import ClientShutdownException
41
+ from scaler.utility.identifiers import ClientID, WorkerID
42
+
43
+
44
+ class Scheduler:
45
+ def __init__(self, config: SchedulerConfig):
46
+ self._config_controller = VanillaConfigController(config)
47
+
48
+ if config.scheduler_address.type != ZMQType.tcp:
49
+ raise TypeError(
50
+ f"{self.__class__.__name__}: scheduler address must be tcp type: \
51
+ {config.scheduler_address.to_address()}"
52
+ )
53
+
54
+ if config.object_storage_address is None:
55
+ object_storage_address = ObjectStorageAddress.new_msg(
56
+ host=config.scheduler_address.host, port=config.scheduler_address.port + 1
57
+ )
58
+ else:
59
+ object_storage_address = ObjectStorageAddress.new_msg(
60
+ host=config.object_storage_address.host, port=config.object_storage_address.port
61
+ )
62
+ self._config_controller.update_config("object_storage_address", object_storage_address)
63
+
64
+ if config.monitor_address is None:
65
+ monitor_address = ZMQConfig(
66
+ type=ZMQType.tcp, host=config.scheduler_address.host, port=config.scheduler_address.port + 2
67
+ )
68
+ else:
69
+ monitor_address = config.monitor_address
70
+ self._config_controller.update_config("monitor_address", monitor_address)
71
+
72
+ self._context = zmq.asyncio.Context(io_threads=config.io_threads)
73
+
74
+ self._binder: AsyncBinder = ZMQAsyncBinder(
75
+ context=self._context, name="scheduler", address=config.scheduler_address
76
+ )
77
+ logging.info(f"{self.__class__.__name__}: listen to scheduler address {config.scheduler_address}")
78
+
79
+ self._connector_storage: AsyncObjectStorageConnector = PyAsyncObjectStorageConnector()
80
+ logging.info(f"{self.__class__.__name__}: connect to object storage server {object_storage_address!r}")
81
+
82
+ self._binder_monitor: AsyncConnector = ZMQAsyncConnector(
83
+ context=self._context,
84
+ name="scheduler_monitor",
85
+ socket_type=zmq.PUB,
86
+ address=monitor_address,
87
+ bind_or_connect="bind",
88
+ callback=None,
89
+ identity=None,
90
+ )
91
+ logging.info(f"{self.__class__.__name__}: listen to scheduler monitor address {monitor_address.to_address()}")
92
+
93
+ self._task_allocate_policy = config.allocate_policy.value()
94
+
95
+ self._client_manager = VanillaClientController(config_controller=self._config_controller)
96
+ self._object_controller = VanillaObjectController(config_controller=self._config_controller)
97
+ self._graph_controller = VanillaGraphTaskController(config_controller=self._config_controller)
98
+ self._task_controller = VanillaTaskController(config_controller=self._config_controller)
99
+ self._worker_controller = VanillaWorkerController(
100
+ config_controller=self._config_controller, task_allocate_policy=self._task_allocate_policy
101
+ )
102
+ self._balance_controller = VanillaBalanceController(
103
+ config_controller=self._config_controller, task_allocate_policy=self._task_allocate_policy
104
+ )
105
+ self._information_controller = VanillaInformationController(config_controller=self._config_controller)
106
+ self._scaling_controller = (
107
+ VanillaScalingController(config.adapter_webhook_url)
108
+ if config.adapter_webhook_url
109
+ else NullScalingController()
110
+ )
111
+
112
+ # register
113
+ self._binder.register(self.on_receive_message)
114
+ self._client_manager.register(
115
+ self._binder, self._binder_monitor, self._object_controller, self._task_controller, self._worker_controller
116
+ )
117
+ self._object_controller.register(
118
+ self._binder, self._binder_monitor, self._connector_storage, self._client_manager, self._worker_controller
119
+ )
120
+ self._graph_controller.register(
121
+ self._binder,
122
+ self._binder_monitor,
123
+ self._connector_storage,
124
+ self._client_manager,
125
+ self._task_controller,
126
+ self._object_controller,
127
+ )
128
+ self._task_controller.register(
129
+ self._binder,
130
+ self._binder_monitor,
131
+ self._client_manager,
132
+ self._object_controller,
133
+ self._worker_controller,
134
+ self._graph_controller,
135
+ )
136
+ self._worker_controller.register(self._binder, self._binder_monitor, self._task_controller)
137
+ self._balance_controller.register(self._binder, self._binder_monitor, self._task_controller)
138
+
139
+ self._information_controller.register_managers(
140
+ self._binder_monitor,
141
+ self._binder,
142
+ self._client_manager,
143
+ self._object_controller,
144
+ self._task_controller,
145
+ self._worker_controller,
146
+ self._scaling_controller,
147
+ )
148
+
149
+ async def connect_to_storage(self):
150
+ storage_address = self._config_controller.get_config("object_storage_address")
151
+ await self._connector_storage.connect(storage_address.host, storage_address.port)
152
+
153
+ async def on_receive_message(self, source: bytes, message: Message):
154
+ # =====================================================================================
155
+ # client manager
156
+ if isinstance(message, ClientHeartbeat):
157
+ await self._client_manager.on_heartbeat(ClientID(source), message)
158
+ return
159
+
160
+ # scheduler receives client shutdown request from upstream
161
+ if isinstance(message, ClientDisconnect):
162
+ await self._client_manager.on_client_disconnect(ClientID(source), message)
163
+ return
164
+
165
+ # =====================================================================================
166
+ # graph manager
167
+ if isinstance(message, GraphTask):
168
+ await self._graph_controller.on_graph_task(ClientID(source), message)
169
+ return
170
+
171
+ # =====================================================================================
172
+ # task manager
173
+ if isinstance(message, Task):
174
+ await self._task_controller.on_task_new(message)
175
+ return
176
+
177
+ if isinstance(message, TaskCancel):
178
+ if self._graph_controller.is_graph_subtask(message.task_id):
179
+ await self._graph_controller.on_graph_task_cancel(message)
180
+ else:
181
+ await self._task_controller.on_task_cancel(ClientID(source), message)
182
+ return
183
+
184
+ if isinstance(message, TaskCancelConfirm):
185
+ await self._task_controller.on_task_cancel_confirm(message)
186
+ return
187
+
188
+ if isinstance(message, TaskResult):
189
+ await self._task_controller.on_task_result(message)
190
+ return
191
+
192
+ if isinstance(message, TaskLog):
193
+ client = self._client_manager.get_client_id(message.task_id)
194
+ if client is not None:
195
+ await self._binder.send(client, message)
196
+ return
197
+
198
+ # =====================================================================================
199
+ # worker manager
200
+ if isinstance(message, WorkerHeartbeat):
201
+ await self._worker_controller.on_heartbeat(WorkerID(source), message)
202
+ return
203
+
204
+ # scheduler receives worker disconnect request from downstream
205
+ if isinstance(message, DisconnectRequest):
206
+ await self._worker_controller.on_disconnect(WorkerID(source), message)
207
+ return
208
+
209
+ # =====================================================================================
210
+ # object manager
211
+ if isinstance(message, ObjectInstruction):
212
+ await self._object_controller.on_object_instruction(source, message)
213
+ return
214
+
215
+ # =====================================================================================
216
+ # information manager
217
+ if isinstance(message, InformationRequest):
218
+ await self._information_controller.on_request(message)
219
+
220
+ logging.error(f"{self.__class__.__name__}: unknown message from {source=}: {message}")
221
+
222
+ async def get_loops(self):
223
+ await self.connect_to_storage()
224
+
225
+ loops = [
226
+ create_async_loop_routine(self._binder.routine, 0),
227
+ create_async_loop_routine(self._connector_storage.routine, 0),
228
+ create_async_loop_routine(self._graph_controller.routine, 0),
229
+ create_async_loop_routine(
230
+ self._balance_controller.routine, self._config_controller.get_config("load_balance_seconds")
231
+ ),
232
+ create_async_loop_routine(self._client_manager.routine, CLEANUP_INTERVAL_SECONDS),
233
+ create_async_loop_routine(self._object_controller.routine, CLEANUP_INTERVAL_SECONDS),
234
+ create_async_loop_routine(self._worker_controller.routine, CLEANUP_INTERVAL_SECONDS),
235
+ create_async_loop_routine(self._information_controller.routine, STATUS_REPORT_INTERVAL_SECONDS),
236
+ ]
237
+
238
+ try:
239
+ await asyncio.gather(*loops)
240
+ except asyncio.CancelledError:
241
+ pass
242
+ except ClientShutdownException as e:
243
+ logging.info(f"{self.__class__.__name__}: {e}")
244
+ pass
245
+
246
+ self._binder.destroy()
247
+ self._binder_monitor.destroy()
248
+
249
+
250
+ @functools.wraps(Scheduler)
251
+ async def scheduler_main(*args, **kwargs):
252
+ scheduler = Scheduler(*args, **kwargs)
253
+ await scheduler.get_loops()
File without changes
@@ -0,0 +1,92 @@
1
+ from typing import Dict, Optional
2
+
3
+ from scaler.protocol.python.common import TaskState, TaskTransition
4
+
5
+
6
+ class TaskStateMachine:
7
+ # see https://github.com/finos/opengris-scaler/issues/56
8
+ TRANSITION_MAP: Dict[TaskState, Dict[TaskTransition, TaskState]] = {
9
+ TaskState.Inactive: {
10
+ TaskTransition.HasCapacity: TaskState.Running,
11
+ TaskTransition.TaskCancel: TaskState.Canceled,
12
+ },
13
+ TaskState.Canceling: {
14
+ TaskTransition.TaskCancelConfirmCanceled: TaskState.Canceled,
15
+ TaskTransition.WorkerDisconnect: TaskState.Canceled,
16
+ TaskTransition.TaskCancelConfirmFailed: TaskState.Running,
17
+ TaskTransition.TaskCancelConfirmNotFound: TaskState.CanceledNotFound,
18
+ },
19
+ TaskState.Running: {
20
+ TaskTransition.TaskResultSuccess: TaskState.Success,
21
+ TaskTransition.TaskResultFailed: TaskState.Failed,
22
+ TaskTransition.TaskResultWorkerDied: TaskState.FailedWorkerDied,
23
+ TaskTransition.TaskCancel: TaskState.Canceling,
24
+ TaskTransition.BalanceTaskCancel: TaskState.BalanceCanceling,
25
+ TaskTransition.WorkerDisconnect: TaskState.WorkerDisconnecting,
26
+ },
27
+ TaskState.BalanceCanceling: {
28
+ TaskTransition.TaskResultSuccess: TaskState.Success,
29
+ TaskTransition.TaskResultFailed: TaskState.Failed,
30
+ TaskTransition.TaskResultWorkerDied: TaskState.FailedWorkerDied,
31
+ TaskTransition.TaskCancel: TaskState.Canceling,
32
+ TaskTransition.TaskCancelConfirmCanceled: TaskState.Inactive,
33
+ TaskTransition.TaskCancelConfirmFailed: TaskState.Running,
34
+ TaskTransition.WorkerDisconnect: TaskState.WorkerDisconnecting,
35
+ },
36
+ TaskState.WorkerDisconnecting: {
37
+ TaskTransition.SchedulerHasTask: TaskState.Inactive,
38
+ TaskTransition.SchedulerHasNoTask: TaskState.FailedWorkerDied,
39
+ },
40
+ }
41
+
42
+ def __init__(self, debug):
43
+ self._debug = debug
44
+ self._paths = list()
45
+
46
+ self._previous_state = None
47
+ self._state = TaskState.Inactive
48
+
49
+ def __repr__(self):
50
+ return f"TaskStateMachine(previous_state={self._previous_state}, state={self._state})"
51
+
52
+ def get_path(self):
53
+ return (
54
+ " ".join(f"[{state.name}] -{transition.name}->" for state, transition in self._paths)
55
+ + f" [{self._state.name}]"
56
+ )
57
+
58
+ def previous_state(self) -> Optional[TaskState]:
59
+ return self._previous_state
60
+
61
+ def current_state(self) -> TaskState:
62
+ return self._state
63
+
64
+ def is_running(self) -> bool:
65
+ return self._state == TaskState.Running
66
+
67
+ def is_canceling(self) -> bool:
68
+ return self._state == TaskState.Canceling
69
+
70
+ def is_finished(self) -> bool:
71
+ return self._state in {TaskState.Success, TaskState.Failed, TaskState.FailedWorkerDied}
72
+
73
+ def is_canceled(self) -> bool:
74
+ return self._state in {TaskState.Canceled, TaskState.CanceledNotFound}
75
+
76
+ def is_done(self) -> bool:
77
+ return self.is_finished() or self.is_canceled()
78
+
79
+ def on_transition(self, transition: TaskTransition) -> bool:
80
+ if self._state not in TaskStateMachine.TRANSITION_MAP:
81
+ return False
82
+
83
+ options = TaskStateMachine.TRANSITION_MAP[self._state]
84
+ if transition not in options:
85
+ return False
86
+
87
+ if self._debug:
88
+ self._paths.append((self._state, transition))
89
+
90
+ self._previous_state = self._state
91
+ self._state = options[transition]
92
+ return True
@@ -0,0 +1,61 @@
1
+ import logging
2
+ from typing import Dict, Optional
3
+
4
+ from scaler.protocol.python.common import TaskState, TaskTransition
5
+ from scaler.scheduler.task.task_state_machine import TaskStateMachine
6
+ from scaler.utility.identifiers import TaskID
7
+
8
+
9
+ class TaskStateManager:
10
+ def __init__(self, debug: bool):
11
+ self._debug = debug
12
+ self._task_id_to_state_machine: Dict[TaskID, TaskStateMachine] = dict()
13
+ self._statistics: Dict[TaskState, int] = {state: 0 for state in TaskState}
14
+
15
+ def add_state_machine(self, task_id: TaskID) -> TaskStateMachine:
16
+ """Create new task state machine, return True if success, False otherwise"""
17
+ assert task_id not in self._task_id_to_state_machine
18
+
19
+ state_machine = TaskStateMachine(self._debug)
20
+ self._task_id_to_state_machine[task_id] = state_machine
21
+ self._statistics[state_machine.current_state()] += 1
22
+ return state_machine
23
+
24
+ def remove_state_machine(self, task_id: TaskID):
25
+ self._task_id_to_state_machine.pop(task_id)
26
+
27
+ def get_state_machine(self, task_id: TaskID) -> Optional[TaskStateMachine]:
28
+ return self._task_id_to_state_machine.get(task_id, None)
29
+
30
+ def on_transition(self, task_id: TaskID, transition: TaskTransition) -> Optional[TaskStateMachine]:
31
+ """if adjust task state machine is successful, then return TaskStateFlags object associate with the task_id,
32
+ return None otherwise
33
+
34
+ This should be a central place to synchronize task state machine, if any unexpected event happened, it will not
35
+ return the TaskStateFlags
36
+ """
37
+
38
+ task_state_machine = self._task_id_to_state_machine.get(task_id, None)
39
+ if task_state_machine is None:
40
+ logging.error(f"{task_id!r}: unknown {transition=} for non-existed state machine")
41
+ return None
42
+
43
+ transit_success = task_state_machine.on_transition(transition)
44
+ if transit_success:
45
+ self._statistics[task_state_machine.previous_state()] -= 1
46
+ self._statistics[task_state_machine.current_state()] += 1
47
+ else:
48
+ logging.error(
49
+ f"{task_id!r}: cannot apply {transition} to current state" f" {task_state_machine.current_state()}"
50
+ )
51
+
52
+ return task_state_machine if transit_success else None
53
+
54
+ def get_statistics(self) -> Dict[TaskState, int]:
55
+ return self._statistics
56
+
57
+ def get_debug_paths(self):
58
+ return "\n".join(
59
+ f"{task_id!r}: {state_machine.get_path()}"
60
+ for task_id, state_machine in self._task_id_to_state_machine.items()
61
+ )
scaler/ui/__init__.py ADDED
File without changes
scaler/ui/constants.py ADDED
@@ -0,0 +1,9 @@
1
+ # times are in seconds
2
+
3
+ TASK_LOG_REFRESH_INTERVAL = 0.5
4
+
5
+ WORKER_PROCESSORS_REFRESH_INTERVAL = 2
6
+
7
+ TASK_STREAM_UPDATE_INTERVAL = 0.1
8
+
9
+ MEMORY_USAGE_UPDATE_INTERVAL = 0.1
@@ -0,0 +1,118 @@
1
+ import dataclasses
2
+ from collections import defaultdict
3
+ from typing import Dict, List, Optional
4
+
5
+ from nicegui import ui
6
+ from nicegui.element import Element
7
+
8
+ from scaler.protocol.python.status import WorkerStatus
9
+ from scaler.ui.utility import format_worker_name
10
+ from scaler.utility.formatter import format_microseconds, format_seconds
11
+
12
+
13
+ @dataclasses.dataclass
14
+ class SchedulerSection:
15
+ cpu: str = dataclasses.field(default="")
16
+ rss: str = dataclasses.field(default="")
17
+ rss_free: str = dataclasses.field(default="")
18
+
19
+ handler: Optional[Element] = dataclasses.field(default=None)
20
+
21
+ def draw_section(self):
22
+ with ui.card().classes("w-full"), ui.row() as handler:
23
+ self.handler = handler
24
+ ui.label("Scheduler")
25
+ ui.label()
26
+ ui.label("CPU:")
27
+ ui.label().bind_text_from(self, "cpu")
28
+ ui.label()
29
+ ui.label("RSS:")
30
+ ui.label().bind_text_from(self, "rss")
31
+ ui.label()
32
+ ui.label("RSS Free:")
33
+ ui.label().bind_text_from(self, "rss_free")
34
+
35
+ def delete_section(self):
36
+ self.handler.clear()
37
+ self.handler.delete()
38
+
39
+
40
+ @dataclasses.dataclass
41
+ class WorkerRow:
42
+ worker: str = dataclasses.field(default="")
43
+ agt_cpu: float = dataclasses.field(default=0)
44
+ agt_rss: int = dataclasses.field(default=0)
45
+ cpu: float = dataclasses.field(default=0)
46
+ rss: int = dataclasses.field(default=0)
47
+ rss_free: int = dataclasses.field(default=0)
48
+ free: int = dataclasses.field(default=0)
49
+ sent: int = dataclasses.field(default=0)
50
+ queued: int = dataclasses.field(default=0)
51
+ suspended: int = dataclasses.field(default=0)
52
+ lag: str = dataclasses.field(default="")
53
+ itl: str = dataclasses.field(default="")
54
+ last_seen: str = dataclasses.field(default="")
55
+
56
+ handlers: List[Element] = dataclasses.field(default_factory=list)
57
+
58
+ def populate(self, data: WorkerStatus):
59
+ self.worker = data.worker_id.decode()
60
+ self.agt_cpu = data.agent.cpu / 10
61
+ self.agt_rss = int(data.agent.rss / 1e6)
62
+ self.cpu = sum(p.resource.cpu for p in data.processor_statuses) / 10
63
+ self.rss = int(sum(p.resource.rss for p in data.processor_statuses) / 1e6)
64
+ self.rss_free = int(data.rss_free / 1e6)
65
+ self.free = data.free
66
+ self.sent = data.sent
67
+ self.queued = data.queued
68
+ self.suspended = data.suspended
69
+ self.lag = format_microseconds(data.lag_us)
70
+ self.itl = data.itl
71
+ self.last_seen = format_seconds(data.last_s)
72
+
73
+ def draw_row(self):
74
+ total_rss = self.rss + self.rss_free
75
+
76
+ ui.label(format_worker_name(self.worker))
77
+ ui.knob(track_color="grey-2", show_value=True, min=0, max=100).bind_value_from(self, "agt_cpu")
78
+ ui.knob(track_color="grey-2", show_value=True, min=0, max=total_rss).bind_value_from(self, "agt_rss")
79
+ ui.knob(track_color="grey-2", show_value=True, min=0, max=100).bind_value_from(self, "cpu")
80
+ ui.knob(track_color="grey-2", show_value=True, min=0, max=total_rss).bind_value_from(self, "rss")
81
+ ui.label().bind_text_from(self, "free")
82
+ ui.label().bind_text_from(self, "sent")
83
+ ui.label().bind_text_from(self, "queued")
84
+ ui.label().bind_text_from(self, "suspended")
85
+ ui.label().bind_text_from(self, "lag")
86
+ ui.label().bind_text_from(self, "ITL")
87
+ ui.label().bind_text_from(self, "last_seen")
88
+
89
+ def delete_row(self):
90
+ for element in self.handlers:
91
+ element.delete()
92
+
93
+
94
+ @dataclasses.dataclass
95
+ class WorkersSection:
96
+ workers: Dict[str, WorkerRow] = dataclasses.field(default_factory=lambda: defaultdict(WorkerRow))
97
+
98
+ @ui.refreshable
99
+ def draw_section(self):
100
+ with ui.row().classes("h-max"), ui.card().classes("w-full"), ui.grid(columns=12):
101
+ self.__draw_titles()
102
+ for worker_row in self.workers.values():
103
+ worker_row.draw_row()
104
+
105
+ @staticmethod
106
+ def __draw_titles():
107
+ ui.label("Worker")
108
+ ui.label("Agt CPU %")
109
+ ui.label("Agt RSS (in MB)")
110
+ ui.label("Processors CPU %")
111
+ ui.label("Processors RSS (in MB)")
112
+ ui.label("Queue Capacity")
113
+ ui.label("Tasks Sent")
114
+ ui.label("Tasks Queued")
115
+ ui.label("Tasks Suspended")
116
+ ui.label("Lag")
117
+ ui.label("ITL")
118
+ ui.label("Last Seen")
@@ -0,0 +1,146 @@
1
+ import datetime
2
+ from typing import Any, Dict, Optional
3
+
4
+ from nicegui import ui
5
+
6
+ from scaler.protocol.python.message import StateTask
7
+ from scaler.ui.setting_page import Settings
8
+ from scaler.ui.utility import format_timediff, get_bounds, make_tick_text, make_ticks
9
+ from scaler.utility.formatter import format_bytes
10
+ from scaler.utility.metadata.profile_result import ProfileResult
11
+
12
+ CHART_NAME = "Memory Usage"
13
+ X_AXIS_GRID_LINES = False
14
+
15
+ Y_AXIS_TICK_VALS = [0, 1024, 1024 * 1024, 1024 * 1024 * 1024, 1024 * 1024 * 1024 * 1024]
16
+ Y_AXIS_TICK_TEXT = ["0", "1KB", "1MB", "1GB", "1TB"]
17
+ Y_AXIS_GRID_LINES = True
18
+
19
+
20
+ class MemoryChart:
21
+ def __init__(self):
22
+ self._figure = {}
23
+ self._plot = None
24
+ self._plot_data: Dict[str, Any] = {}
25
+
26
+ self._settings: Optional[Settings] = None
27
+
28
+ self._start_time = datetime.datetime.now() - datetime.timedelta(minutes=30)
29
+
30
+ def setup_memory_chart(self, settings: Settings):
31
+ with ui.card().classes("w-full").style("height: 30vh"):
32
+ self._plot_data = {
33
+ "type": "scatter",
34
+ "fill": "tozeroy",
35
+ "fillcolor": "rgba(0,0,255,1)",
36
+ "mode": "none",
37
+ "name": "",
38
+ "x": [],
39
+ "y": [],
40
+ "hovertemplate": [],
41
+ }
42
+ fig = {
43
+ "data": [self._plot_data],
44
+ "layout": {
45
+ "title": {"text": CHART_NAME},
46
+ "autosize": True,
47
+ "margin": {"l": 163},
48
+ "xaxis": {
49
+ "autorange": False,
50
+ "range": [0, 300],
51
+ "showgrid": X_AXIS_GRID_LINES,
52
+ "tickmode": "array",
53
+ "tickvals": [0, 50, 100, 150, 200, 250, 300],
54
+ "ticktext": [-300, -250, -200, -150, -100, -50, 0],
55
+ "zeroline": False,
56
+ },
57
+ "yaxis": {
58
+ "tickvals": Y_AXIS_TICK_VALS,
59
+ "ticktext": Y_AXIS_TICK_TEXT,
60
+ "autorange": True,
61
+ "automargin": True,
62
+ "rangemode": "nonnegative",
63
+ "showgrid": Y_AXIS_GRID_LINES,
64
+ "type": "log",
65
+ },
66
+ },
67
+ }
68
+ self._figure = fig
69
+ self._plot = ui.plotly(self._figure).classes("w-full h-full")
70
+ self._settings = settings
71
+
72
+ def handle_task_state(self, state: StateTask):
73
+ """
74
+ Only completed tasks have profiling data.
75
+ Use this data to fill in history.
76
+ """
77
+
78
+ if state.metadata == b"":
79
+ return
80
+
81
+ profile_result = ProfileResult.deserialize(state.metadata)
82
+
83
+ worker_memory = profile_result.memory_peak
84
+ worker_duration = profile_result.duration_s
85
+
86
+ if worker_memory == 0:
87
+ return
88
+
89
+ self.__add_memory_usage(worker_duration, worker_memory)
90
+
91
+ def update_plot(self):
92
+ now = datetime.datetime.now()
93
+ self.__render_plot(now)
94
+
95
+ def __add_memory_usage(self, time_taken: float, memory_usage: int):
96
+ now = datetime.datetime.now()
97
+ current_time = format_timediff(self._start_time, now)
98
+ task_start_time = current_time - time_taken
99
+
100
+ # find index we need to start changing memory use from
101
+ insert_index = len(self._plot_data["x"])
102
+ while insert_index > 0 and self._plot_data["x"][insert_index - 1] > task_start_time:
103
+ insert_index -= 1
104
+
105
+ # insert points to mark change
106
+ self._plot_data["x"].insert(insert_index, task_start_time)
107
+ self._plot_data["x"].insert(insert_index, task_start_time - 0.01)
108
+
109
+ prev_mem = 0
110
+ if insert_index < len(self._plot_data["y"]):
111
+ prev_mem = self._plot_data["y"][insert_index]
112
+
113
+ self._plot_data["y"].insert(insert_index, prev_mem + memory_usage)
114
+ self._plot_data["y"].insert(insert_index, prev_mem)
115
+
116
+ self._plot_data["hovertemplate"].insert(insert_index, format_bytes(prev_mem + memory_usage))
117
+ self._plot_data["hovertemplate"].insert(insert_index, format_bytes(prev_mem))
118
+
119
+ # fill future overlapping points with the additional memory use
120
+ i = insert_index + 2
121
+ while i < len(self._plot_data["x"]):
122
+ self._plot_data["y"][i] += memory_usage
123
+ self._plot_data["hovertemplate"][i] = format_bytes(self._plot_data["y"][i])
124
+ i += 1
125
+
126
+ # mark end of this task's memory use. always going to be the latest task we have so far.
127
+ self._plot_data["y"].append(memory_usage)
128
+ self._plot_data["hovertemplate"].append(format_bytes(memory_usage))
129
+ self._plot_data["x"].append(current_time - 0.01)
130
+
131
+ self._plot_data["y"].append(0)
132
+ self._plot_data["hovertemplate"].append(format_bytes(0))
133
+ self._plot_data["x"].append(current_time)
134
+
135
+ def __render_plot(self, now: datetime.datetime):
136
+ lower_bound, upper_bound = get_bounds(now, self._start_time, self._settings)
137
+
138
+ ticks = make_ticks(lower_bound, upper_bound)
139
+ tick_text = make_tick_text(int(self._settings.stream_window.total_seconds()))
140
+
141
+ self._figure["layout"]["xaxis"]["range"] = [lower_bound, upper_bound]
142
+ self._figure["layout"]["xaxis"]["tickvals"] = ticks
143
+ self._figure["layout"]["xaxis"]["ticktext"] = tick_text
144
+ self._figure["layout"]["yaxis"]["type"] = self._settings.memory_usage_scale
145
+
146
+ self._plot.update()