opengris-scaler 1.12.7__cp310-cp310-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of opengris-scaler might be problematic. Click here for more details.

Files changed (234) hide show
  1. opengris_scaler-1.12.7.dist-info/METADATA +729 -0
  2. opengris_scaler-1.12.7.dist-info/RECORD +234 -0
  3. opengris_scaler-1.12.7.dist-info/WHEEL +5 -0
  4. opengris_scaler-1.12.7.dist-info/entry_points.txt +9 -0
  5. opengris_scaler-1.12.7.dist-info/licenses/LICENSE +201 -0
  6. opengris_scaler-1.12.7.dist-info/licenses/LICENSE.spdx +7 -0
  7. opengris_scaler-1.12.7.dist-info/licenses/NOTICE +8 -0
  8. opengris_scaler.libs/libcapnp-1-61c06778.1.0.so +0 -0
  9. opengris_scaler.libs/libgcc_s-2298274a.so.1 +0 -0
  10. opengris_scaler.libs/libkj-1-21b63b70.1.0.so +0 -0
  11. opengris_scaler.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
  12. scaler/CMakeLists.txt +11 -0
  13. scaler/__init__.py +14 -0
  14. scaler/about.py +5 -0
  15. scaler/client/__init__.py +0 -0
  16. scaler/client/agent/__init__.py +0 -0
  17. scaler/client/agent/client_agent.py +210 -0
  18. scaler/client/agent/disconnect_manager.py +27 -0
  19. scaler/client/agent/future_manager.py +112 -0
  20. scaler/client/agent/heartbeat_manager.py +74 -0
  21. scaler/client/agent/mixins.py +89 -0
  22. scaler/client/agent/object_manager.py +98 -0
  23. scaler/client/agent/task_manager.py +64 -0
  24. scaler/client/client.py +635 -0
  25. scaler/client/future.py +252 -0
  26. scaler/client/object_buffer.py +129 -0
  27. scaler/client/object_reference.py +25 -0
  28. scaler/client/serializer/__init__.py +0 -0
  29. scaler/client/serializer/default.py +16 -0
  30. scaler/client/serializer/mixins.py +38 -0
  31. scaler/cluster/__init__.py +0 -0
  32. scaler/cluster/cluster.py +115 -0
  33. scaler/cluster/combo.py +148 -0
  34. scaler/cluster/object_storage_server.py +45 -0
  35. scaler/cluster/scheduler.py +83 -0
  36. scaler/config/__init__.py +0 -0
  37. scaler/config/defaults.py +87 -0
  38. scaler/config/loader.py +95 -0
  39. scaler/config/mixins.py +15 -0
  40. scaler/config/section/__init__.py +0 -0
  41. scaler/config/section/cluster.py +56 -0
  42. scaler/config/section/native_worker_adapter.py +44 -0
  43. scaler/config/section/object_storage_server.py +7 -0
  44. scaler/config/section/scheduler.py +53 -0
  45. scaler/config/section/symphony_worker_adapter.py +47 -0
  46. scaler/config/section/top.py +13 -0
  47. scaler/config/section/webui.py +16 -0
  48. scaler/config/types/__init__.py +0 -0
  49. scaler/config/types/object_storage_server.py +45 -0
  50. scaler/config/types/worker.py +57 -0
  51. scaler/config/types/zmq.py +79 -0
  52. scaler/entry_points/__init__.py +0 -0
  53. scaler/entry_points/cluster.py +133 -0
  54. scaler/entry_points/object_storage_server.py +41 -0
  55. scaler/entry_points/scheduler.py +135 -0
  56. scaler/entry_points/top.py +286 -0
  57. scaler/entry_points/webui.py +26 -0
  58. scaler/entry_points/worker_adapter_native.py +137 -0
  59. scaler/entry_points/worker_adapter_symphony.py +102 -0
  60. scaler/io/__init__.py +0 -0
  61. scaler/io/async_binder.py +85 -0
  62. scaler/io/async_connector.py +95 -0
  63. scaler/io/async_object_storage_connector.py +185 -0
  64. scaler/io/mixins.py +154 -0
  65. scaler/io/sync_connector.py +68 -0
  66. scaler/io/sync_object_storage_connector.py +185 -0
  67. scaler/io/sync_subscriber.py +83 -0
  68. scaler/io/utility.py +31 -0
  69. scaler/io/ymq/CMakeLists.txt +98 -0
  70. scaler/io/ymq/__init__.py +0 -0
  71. scaler/io/ymq/_ymq.pyi +96 -0
  72. scaler/io/ymq/_ymq.so +0 -0
  73. scaler/io/ymq/bytes.h +114 -0
  74. scaler/io/ymq/common.h +29 -0
  75. scaler/io/ymq/configuration.h +60 -0
  76. scaler/io/ymq/epoll_context.cpp +185 -0
  77. scaler/io/ymq/epoll_context.h +85 -0
  78. scaler/io/ymq/error.h +132 -0
  79. scaler/io/ymq/event_loop.h +55 -0
  80. scaler/io/ymq/event_loop_thread.cpp +64 -0
  81. scaler/io/ymq/event_loop_thread.h +46 -0
  82. scaler/io/ymq/event_manager.h +81 -0
  83. scaler/io/ymq/file_descriptor.h +203 -0
  84. scaler/io/ymq/interruptive_concurrent_queue.h +169 -0
  85. scaler/io/ymq/io_context.cpp +98 -0
  86. scaler/io/ymq/io_context.h +44 -0
  87. scaler/io/ymq/io_socket.cpp +299 -0
  88. scaler/io/ymq/io_socket.h +121 -0
  89. scaler/io/ymq/iocp_context.cpp +102 -0
  90. scaler/io/ymq/iocp_context.h +83 -0
  91. scaler/io/ymq/logging.h +163 -0
  92. scaler/io/ymq/message.h +15 -0
  93. scaler/io/ymq/message_connection.h +16 -0
  94. scaler/io/ymq/message_connection_tcp.cpp +672 -0
  95. scaler/io/ymq/message_connection_tcp.h +96 -0
  96. scaler/io/ymq/network_utils.h +179 -0
  97. scaler/io/ymq/pymod_ymq/bytes.h +113 -0
  98. scaler/io/ymq/pymod_ymq/exception.h +124 -0
  99. scaler/io/ymq/pymod_ymq/gil.h +15 -0
  100. scaler/io/ymq/pymod_ymq/io_context.h +166 -0
  101. scaler/io/ymq/pymod_ymq/io_socket.h +285 -0
  102. scaler/io/ymq/pymod_ymq/message.h +99 -0
  103. scaler/io/ymq/pymod_ymq/python.h +153 -0
  104. scaler/io/ymq/pymod_ymq/ymq.cpp +23 -0
  105. scaler/io/ymq/pymod_ymq/ymq.h +357 -0
  106. scaler/io/ymq/readme.md +114 -0
  107. scaler/io/ymq/simple_interface.cpp +80 -0
  108. scaler/io/ymq/simple_interface.h +24 -0
  109. scaler/io/ymq/tcp_client.cpp +367 -0
  110. scaler/io/ymq/tcp_client.h +75 -0
  111. scaler/io/ymq/tcp_operations.h +41 -0
  112. scaler/io/ymq/tcp_server.cpp +410 -0
  113. scaler/io/ymq/tcp_server.h +79 -0
  114. scaler/io/ymq/third_party/concurrentqueue.h +3747 -0
  115. scaler/io/ymq/timed_queue.h +272 -0
  116. scaler/io/ymq/timestamp.h +102 -0
  117. scaler/io/ymq/typedefs.h +20 -0
  118. scaler/io/ymq/utils.h +34 -0
  119. scaler/io/ymq/ymq.py +130 -0
  120. scaler/object_storage/CMakeLists.txt +50 -0
  121. scaler/object_storage/__init__.py +0 -0
  122. scaler/object_storage/constants.h +11 -0
  123. scaler/object_storage/defs.h +14 -0
  124. scaler/object_storage/io_helper.cpp +44 -0
  125. scaler/object_storage/io_helper.h +9 -0
  126. scaler/object_storage/message.cpp +56 -0
  127. scaler/object_storage/message.h +130 -0
  128. scaler/object_storage/object_manager.cpp +126 -0
  129. scaler/object_storage/object_manager.h +52 -0
  130. scaler/object_storage/object_storage_server.cpp +359 -0
  131. scaler/object_storage/object_storage_server.h +126 -0
  132. scaler/object_storage/object_storage_server.so +0 -0
  133. scaler/object_storage/pymod_object_storage_server.cpp +104 -0
  134. scaler/protocol/__init__.py +0 -0
  135. scaler/protocol/capnp/__init__.py +0 -0
  136. scaler/protocol/capnp/_python.py +6 -0
  137. scaler/protocol/capnp/common.capnp +63 -0
  138. scaler/protocol/capnp/message.capnp +216 -0
  139. scaler/protocol/capnp/object_storage.capnp +52 -0
  140. scaler/protocol/capnp/status.capnp +73 -0
  141. scaler/protocol/introduction.md +105 -0
  142. scaler/protocol/python/__init__.py +0 -0
  143. scaler/protocol/python/common.py +135 -0
  144. scaler/protocol/python/message.py +726 -0
  145. scaler/protocol/python/mixins.py +13 -0
  146. scaler/protocol/python/object_storage.py +118 -0
  147. scaler/protocol/python/status.py +279 -0
  148. scaler/protocol/worker.md +228 -0
  149. scaler/scheduler/__init__.py +0 -0
  150. scaler/scheduler/allocate_policy/__init__.py +0 -0
  151. scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
  152. scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
  153. scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
  154. scaler/scheduler/allocate_policy/mixins.py +55 -0
  155. scaler/scheduler/controllers/__init__.py +0 -0
  156. scaler/scheduler/controllers/balance_controller.py +65 -0
  157. scaler/scheduler/controllers/client_controller.py +131 -0
  158. scaler/scheduler/controllers/config_controller.py +31 -0
  159. scaler/scheduler/controllers/graph_controller.py +424 -0
  160. scaler/scheduler/controllers/information_controller.py +81 -0
  161. scaler/scheduler/controllers/mixins.py +201 -0
  162. scaler/scheduler/controllers/object_controller.py +147 -0
  163. scaler/scheduler/controllers/scaling_controller.py +86 -0
  164. scaler/scheduler/controllers/task_controller.py +373 -0
  165. scaler/scheduler/controllers/worker_controller.py +168 -0
  166. scaler/scheduler/object_usage/__init__.py +0 -0
  167. scaler/scheduler/object_usage/object_tracker.py +131 -0
  168. scaler/scheduler/scheduler.py +253 -0
  169. scaler/scheduler/task/__init__.py +0 -0
  170. scaler/scheduler/task/task_state_machine.py +92 -0
  171. scaler/scheduler/task/task_state_manager.py +61 -0
  172. scaler/ui/__init__.py +0 -0
  173. scaler/ui/constants.py +9 -0
  174. scaler/ui/live_display.py +118 -0
  175. scaler/ui/memory_window.py +146 -0
  176. scaler/ui/setting_page.py +47 -0
  177. scaler/ui/task_graph.py +370 -0
  178. scaler/ui/task_log.py +83 -0
  179. scaler/ui/utility.py +35 -0
  180. scaler/ui/webui.py +125 -0
  181. scaler/ui/worker_processors.py +85 -0
  182. scaler/utility/__init__.py +0 -0
  183. scaler/utility/debug.py +19 -0
  184. scaler/utility/event_list.py +63 -0
  185. scaler/utility/event_loop.py +58 -0
  186. scaler/utility/exceptions.py +42 -0
  187. scaler/utility/formatter.py +44 -0
  188. scaler/utility/graph/__init__.py +0 -0
  189. scaler/utility/graph/optimization.py +27 -0
  190. scaler/utility/graph/topological_sorter.py +11 -0
  191. scaler/utility/graph/topological_sorter_graphblas.py +174 -0
  192. scaler/utility/identifiers.py +105 -0
  193. scaler/utility/logging/__init__.py +0 -0
  194. scaler/utility/logging/decorators.py +25 -0
  195. scaler/utility/logging/scoped_logger.py +33 -0
  196. scaler/utility/logging/utility.py +183 -0
  197. scaler/utility/many_to_many_dict.py +123 -0
  198. scaler/utility/metadata/__init__.py +0 -0
  199. scaler/utility/metadata/profile_result.py +31 -0
  200. scaler/utility/metadata/task_flags.py +30 -0
  201. scaler/utility/mixins.py +13 -0
  202. scaler/utility/network_util.py +7 -0
  203. scaler/utility/one_to_many_dict.py +72 -0
  204. scaler/utility/queues/__init__.py +0 -0
  205. scaler/utility/queues/async_indexed_queue.py +37 -0
  206. scaler/utility/queues/async_priority_queue.py +70 -0
  207. scaler/utility/queues/async_sorted_priority_queue.py +45 -0
  208. scaler/utility/queues/indexed_queue.py +114 -0
  209. scaler/utility/serialization.py +9 -0
  210. scaler/version.txt +1 -0
  211. scaler/worker/__init__.py +0 -0
  212. scaler/worker/agent/__init__.py +0 -0
  213. scaler/worker/agent/heartbeat_manager.py +107 -0
  214. scaler/worker/agent/mixins.py +137 -0
  215. scaler/worker/agent/processor/__init__.py +0 -0
  216. scaler/worker/agent/processor/object_cache.py +107 -0
  217. scaler/worker/agent/processor/processor.py +279 -0
  218. scaler/worker/agent/processor/streaming_buffer.py +28 -0
  219. scaler/worker/agent/processor_holder.py +145 -0
  220. scaler/worker/agent/processor_manager.py +365 -0
  221. scaler/worker/agent/profiling_manager.py +109 -0
  222. scaler/worker/agent/task_manager.py +150 -0
  223. scaler/worker/agent/timeout_manager.py +19 -0
  224. scaler/worker/preload.py +84 -0
  225. scaler/worker/worker.py +264 -0
  226. scaler/worker_adapter/__init__.py +0 -0
  227. scaler/worker_adapter/native.py +154 -0
  228. scaler/worker_adapter/symphony/__init__.py +0 -0
  229. scaler/worker_adapter/symphony/callback.py +45 -0
  230. scaler/worker_adapter/symphony/heartbeat_manager.py +79 -0
  231. scaler/worker_adapter/symphony/message.py +24 -0
  232. scaler/worker_adapter/symphony/task_manager.py +288 -0
  233. scaler/worker_adapter/symphony/worker.py +205 -0
  234. scaler/worker_adapter/symphony/worker_adapter.py +142 -0
@@ -0,0 +1,635 @@
1
+ import dataclasses
2
+ import functools
3
+ import logging
4
+ import threading
5
+ import uuid
6
+ from collections import Counter
7
+ from inspect import signature
8
+ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
9
+
10
+ import zmq
11
+
12
+ from scaler.client.agent.client_agent import ClientAgent
13
+ from scaler.client.agent.future_manager import ClientFutureManager
14
+ from scaler.client.future import ScalerFuture
15
+ from scaler.client.object_buffer import ObjectBuffer
16
+ from scaler.client.object_reference import ObjectReference
17
+ from scaler.client.serializer.default import DefaultSerializer
18
+ from scaler.client.serializer.mixins import Serializer
19
+ from scaler.config.defaults import DEFAULT_CLIENT_TIMEOUT_SECONDS, DEFAULT_HEARTBEAT_INTERVAL_SECONDS
20
+ from scaler.io.mixins import SyncConnector, SyncObjectStorageConnector
21
+ from scaler.io.sync_connector import ZMQSyncConnector
22
+ from scaler.io.sync_object_storage_connector import PySyncObjectStorageConnector
23
+ from scaler.protocol.python.message import ClientDisconnect, ClientShutdownResponse, GraphTask, Task
24
+ from scaler.utility.exceptions import ClientQuitException, MissingObjects
25
+ from scaler.utility.graph.optimization import cull_graph
26
+ from scaler.utility.graph.topological_sorter import TopologicalSorter
27
+ from scaler.utility.identifiers import ClientID, ObjectID, TaskID
28
+ from scaler.utility.metadata.profile_result import ProfileResult
29
+ from scaler.utility.metadata.task_flags import TaskFlags, retrieve_task_flags_from_task
30
+ from scaler.config.types.zmq import ZMQConfig, ZMQType
31
+ from scaler.worker.agent.processor.processor import Processor
32
+
33
+
34
+ @dataclasses.dataclass
35
+ class _CallNode:
36
+ func: Callable
37
+ args: Tuple[str, ...]
38
+
39
+ def __post_init__(self):
40
+ if not callable(self.func):
41
+ raise TypeError(f"the first item of the tuple must be function, get {self.func}")
42
+
43
+ if not isinstance(self.args, tuple):
44
+ raise TypeError(f"arguments must be tuple, get {self.args}")
45
+
46
+ for arg in self.args:
47
+ if not isinstance(arg, str):
48
+ raise TypeError(f"argument `{arg}` must be a string and the string has to be in the graph")
49
+
50
+
51
+ class Client:
52
+ def __init__(
53
+ self,
54
+ address: str,
55
+ profiling: bool = False,
56
+ timeout_seconds: int = DEFAULT_CLIENT_TIMEOUT_SECONDS,
57
+ heartbeat_interval_seconds: int = DEFAULT_HEARTBEAT_INTERVAL_SECONDS,
58
+ serializer: Serializer = DefaultSerializer(),
59
+ stream_output: bool = False,
60
+ ):
61
+ """
62
+ The Scaler Client used to send tasks to a scheduler.
63
+
64
+ :param address: Address of Scheduler to submit work to
65
+ :type address: str
66
+ :param profiling: If True, the returned futures will have the `task_duration()` property enabled.
67
+ :type profiling: bool
68
+ :param timeout_seconds: Seconds until heartbeat times out
69
+ :type timeout_seconds: int
70
+ :param heartbeat_interval_seconds: Frequency of heartbeat to scheduler in seconds
71
+ :type heartbeat_interval_seconds: int
72
+ :param stream_output: If True, stdout/stderr will be streamed to client during task execution
73
+ :type stream_output: bool
74
+ """
75
+ self.__initialize__(address, profiling, timeout_seconds, heartbeat_interval_seconds, serializer, stream_output)
76
+
77
+ def __initialize__(
78
+ self,
79
+ address: str,
80
+ profiling: bool,
81
+ timeout_seconds: int,
82
+ heartbeat_interval_seconds: int,
83
+ serializer: Serializer = DefaultSerializer(),
84
+ stream_output: bool = False,
85
+ ):
86
+ self._serializer = serializer
87
+
88
+ self._profiling = profiling
89
+ self._stream_output = stream_output
90
+ self._identity = ClientID.generate_client_id()
91
+
92
+ self._client_agent_address = ZMQConfig(ZMQType.inproc, host=f"scaler_client_{uuid.uuid4().hex}")
93
+ self._scheduler_address = ZMQConfig.from_string(address)
94
+ self._timeout_seconds = timeout_seconds
95
+ self._heartbeat_interval_seconds = heartbeat_interval_seconds
96
+
97
+ self._stop_event = threading.Event()
98
+ self._context = zmq.Context()
99
+ self._connector_agent: SyncConnector = ZMQSyncConnector(
100
+ context=self._context, socket_type=zmq.PAIR, address=self._client_agent_address, identity=self._identity
101
+ )
102
+
103
+ self._future_manager = ClientFutureManager(self._serializer)
104
+ self._agent = ClientAgent(
105
+ identity=self._identity,
106
+ client_agent_address=self._client_agent_address,
107
+ scheduler_address=ZMQConfig.from_string(address),
108
+ context=self._context,
109
+ future_manager=self._future_manager,
110
+ stop_event=self._stop_event,
111
+ timeout_seconds=self._timeout_seconds,
112
+ heartbeat_interval_seconds=self._heartbeat_interval_seconds,
113
+ serializer=self._serializer,
114
+ )
115
+ self._agent.start()
116
+
117
+ logging.info(f"ScalerClient: connect to scheduler at {self._scheduler_address}")
118
+
119
+ # Blocks until the agent receives the object storage address
120
+ self._storage_address = self._agent.get_storage_address()
121
+
122
+ logging.info(f"ScalerClient: connect to object storage at {self._storage_address}")
123
+ self._connector_storage: SyncObjectStorageConnector = PySyncObjectStorageConnector(
124
+ self._storage_address.host, self._storage_address.port
125
+ )
126
+
127
+ self._object_buffer = ObjectBuffer(
128
+ self._identity, self._serializer, self._connector_agent, self._connector_storage
129
+ )
130
+ self._future_factory = functools.partial(
131
+ ScalerFuture,
132
+ serializer=self._serializer,
133
+ connector_agent=self._connector_agent,
134
+ connector_storage=self._connector_storage,
135
+ )
136
+
137
+ @property
138
+ def identity(self) -> ClientID:
139
+ return self._identity
140
+
141
+ def __del__(self):
142
+ self.disconnect()
143
+
144
+ def __enter__(self):
145
+ return self
146
+
147
+ def __exit__(self, exc_type, exc_val, exc_tb):
148
+ self.disconnect()
149
+
150
+ def __getstate__(self) -> dict:
151
+ """
152
+ Serializes the client object's state.
153
+
154
+ Client serialization is useful when a client reference is used within a remote task:
155
+
156
+
157
+ .. code:: python
158
+
159
+ client = Client(...)
160
+
161
+ def fibonacci(client: Client, n: int):
162
+ if n == 0:
163
+ return 0
164
+ elif n == 1:
165
+ return 1
166
+ else:
167
+ a = client.submit(fibonacci, n - 1)
168
+ b = client.submit(fibonacci, n - 2)
169
+ return a.result() + b.result()
170
+
171
+ print(client.submit(fibonacci, client, 7).result())
172
+
173
+
174
+ When serializing the client, only saves the address parameters. When deserialized, a new client object
175
+ connecting to the same scheduler and remote logger will be instantiated.
176
+ """
177
+
178
+ return {
179
+ "address": self._scheduler_address.to_address(),
180
+ "profiling": self._profiling,
181
+ "stream_output": self._stream_output,
182
+ "timeout_seconds": self._timeout_seconds,
183
+ "heartbeat_interval_seconds": self._heartbeat_interval_seconds,
184
+ }
185
+
186
+ def __setstate__(self, state: dict) -> None:
187
+ # TODO: fix copy the serializer
188
+ self.__initialize__(
189
+ address=state["address"],
190
+ profiling=state["profiling"],
191
+ stream_output=state["stream_output"],
192
+ timeout_seconds=state["timeout_seconds"],
193
+ heartbeat_interval_seconds=state["heartbeat_interval_seconds"],
194
+ )
195
+
196
+ def submit(self, fn: Callable, *args, **kwargs) -> ScalerFuture:
197
+ """
198
+ Submit a single task (function with arguments) to the scheduler, and return a future.
199
+
200
+ See `submit_verbose()` for additional parameters.
201
+
202
+ :param fn: function to be executed remotely
203
+ :type fn: Callable
204
+ :param args: positional arguments will be passed to function
205
+ :param kwargs: keyword arguments will be passed to function
206
+ :return: future of the submitted task
207
+ :rtype: ScalerFuture
208
+ """
209
+
210
+ return self.submit_verbose(fn, args, kwargs)
211
+
212
+ def submit_verbose(
213
+ self, fn: Callable, args: Tuple[Any, ...], kwargs: Dict[str, Any], capabilities: Optional[Dict[str, int]] = None
214
+ ) -> ScalerFuture:
215
+ """
216
+ Submit a single task (function with arguments) to the scheduler, and return a future. Possibly route the task to
217
+ specific workers.
218
+
219
+ :param fn: function to be executed remotely
220
+ :type fn: Callable
221
+ :param args: positional arguments will be passed to function
222
+ :param kwargs: keyword arguments will be passed to function
223
+ :param capabilities: capabilities used for routing the tasks, e.g. `{"gpu": 2, "memory": 1_000_000_000}`.
224
+ :type capabilities: Optional[Dict[str, int]]
225
+ :return: future of the submitted task
226
+ :rtype: ScalerFuture
227
+ """
228
+
229
+ self.__assert_client_not_stopped()
230
+
231
+ function_object_id = self._object_buffer.buffer_send_function(fn).object_id
232
+ all_args = Client.__convert_kwargs_to_args(fn, args, kwargs)
233
+
234
+ task, future = self.__submit(function_object_id, all_args, delayed=True, capabilities=capabilities)
235
+
236
+ self._object_buffer.commit_send_objects()
237
+ self._connector_agent.send(task)
238
+ return future
239
+
240
+ def map(
241
+ self, fn: Callable, iterable: Iterable[Tuple[Any, ...]], capabilities: Optional[Dict[str, int]] = None
242
+ ) -> List[Any]:
243
+ if not all(isinstance(args, (tuple, list)) for args in iterable):
244
+ raise TypeError("iterable should be list of arguments(list or tuple-like) of function")
245
+
246
+ self.__assert_client_not_stopped()
247
+
248
+ function_object_id = self._object_buffer.buffer_send_function(fn).object_id
249
+ tasks, futures = zip(
250
+ *[self.__submit(function_object_id, args, delayed=False, capabilities=capabilities) for args in iterable]
251
+ )
252
+
253
+ self._object_buffer.commit_send_objects()
254
+ for task in tasks:
255
+ self._connector_agent.send(task)
256
+
257
+ try:
258
+ results = [fut.result() for fut in futures]
259
+ except Exception as e:
260
+ logging.exception(f"error happened when do scaler client.map:\n{e}")
261
+ self.disconnect()
262
+ raise e
263
+
264
+ return results
265
+
266
+ def get(
267
+ self,
268
+ graph: Dict[str, Union[Any, Tuple[Union[Callable, str], ...]]],
269
+ keys: List[str],
270
+ block: bool = True,
271
+ capabilities: Optional[Dict[str, int]] = None,
272
+ ) -> Dict[str, Union[Any, ScalerFuture]]:
273
+ """
274
+ .. code-block:: python
275
+ :linenos:
276
+ graph = {
277
+ "a": 1,
278
+ "b": 2,
279
+ "c": (inc, "a"),
280
+ "d": (inc, "b"),
281
+ "e": (add, "c", "d")
282
+ }
283
+
284
+ :param graph: dictionary presentation of task graphs
285
+ :type graph: Dict[str, Union[Any, Tuple[Union[Callable, Any]]
286
+ :param keys: list of keys want to get results from computed graph
287
+ :type keys: List[str]
288
+ :param block: if True, it will directly return a dictionary that maps from keys to results
289
+ :return: dictionary of mapping keys to futures, or map to results if block=True is specified
290
+ :param capabilities: capabilities used for routing the tasks, e.g. `{"gpu": 2, "memory": 1_000_000_000}`.
291
+ :type capabilities: Optional[Dict[str, int]]
292
+ :rtype: Dict[ScalerFuture]
293
+ """
294
+
295
+ self.__assert_client_not_stopped()
296
+
297
+ capabilities = capabilities or {}
298
+
299
+ graph = cull_graph(graph, keys)
300
+
301
+ node_name_to_argument, call_graph = self.__split_data_and_graph(graph)
302
+ self.__check_graph(node_name_to_argument, call_graph, keys)
303
+
304
+ graph_task, compute_futures, finished_futures = self.__construct_graph(
305
+ node_name_to_argument, call_graph, keys, block, capabilities
306
+ )
307
+ self._object_buffer.commit_send_objects()
308
+ self._connector_agent.send(graph_task)
309
+
310
+ self._future_manager.add_future(
311
+ self._future_factory(
312
+ task=Task.new_msg(
313
+ task_id=graph_task.task_id,
314
+ source=self._identity,
315
+ metadata=b"",
316
+ func_object_id=None,
317
+ function_args=[],
318
+ capabilities=capabilities,
319
+ ),
320
+ is_delayed=not block,
321
+ group_task_id=graph_task.task_id,
322
+ )
323
+ )
324
+ for future in compute_futures.values():
325
+ self._future_manager.add_future(future)
326
+
327
+ # preserve the future insertion order based on inputted keys
328
+ futures = {}
329
+ for key in keys:
330
+ if key in compute_futures:
331
+ futures[key] = compute_futures[key]
332
+ else:
333
+ futures[key] = finished_futures[key]
334
+
335
+ if not block:
336
+ # just return futures
337
+ return futures
338
+
339
+ try:
340
+ results = {k: v.result() for k, v in futures.items()}
341
+ except Exception as e:
342
+ logging.exception(f"error happened when do scaler client.get:\n{e}")
343
+ self.disconnect()
344
+ raise e
345
+
346
+ return results
347
+
348
+ def send_object(self, obj: Any, name: Optional[str] = None) -> ObjectReference:
349
+ """
350
+ send object to scheduler, this can be used to cache very large data to scheduler, and reuse it in multiple
351
+ tasks
352
+
353
+ :param obj: object to send, it will be serialized and send to scheduler
354
+ :type obj: Any
355
+ :param name: give a name to the cached argument
356
+ :type name: Optional[str]
357
+ :return: object reference
358
+ :rtype ObjectReference
359
+ """
360
+
361
+ self.__assert_client_not_stopped()
362
+
363
+ cache = self._object_buffer.buffer_send_object(obj, name)
364
+ return ObjectReference(cache.object_name, len(cache.object_payload), cache.object_id)
365
+
366
+ def clear(self):
367
+ """
368
+ clear all resources used by the client, this will cancel all running futures and invalidate all existing object
369
+ references
370
+ """
371
+
372
+ # It's important to be ensure that all running futures are cancelled/finished before clearing object, or else we
373
+ # might end up with tasks indefinitely waiting on no longer existing objects.
374
+ self._future_manager.cancel_all_futures()
375
+
376
+ self._object_buffer.clear()
377
+
378
+ def disconnect(self):
379
+ """
380
+ disconnect from connected scheduler, this will not shut down the scheduler
381
+ """
382
+
383
+ if self._stop_event.is_set():
384
+ self.__destroy()
385
+ return
386
+
387
+ logging.info(f"ScalerClient: disconnect from {self._scheduler_address.to_address()}")
388
+
389
+ self._future_manager.cancel_all_futures()
390
+
391
+ self._connector_agent.send(ClientDisconnect.new_msg(ClientDisconnect.DisconnectType.Disconnect))
392
+
393
+ self.__destroy()
394
+
395
+ def __receive_shutdown_response(self):
396
+ message: Optional[ClientShutdownResponse] = None
397
+ while not isinstance(message, ClientShutdownResponse):
398
+ message = self._connector_agent.receive()
399
+
400
+ if not message.accepted:
401
+ raise ValueError("Scheduler is in protected mode. Can't shutdown")
402
+
403
+ def shutdown(self):
404
+ """
405
+ shutdown all workers that connected to the scheduler this client connects to, it will cancel all other
406
+ clients' ongoing tasks, please be aware shutdown might not success if scheduler is configured as protected mode,
407
+ then it cannot shut down scheduler and the workers
408
+ """
409
+
410
+ if not self._agent.is_alive():
411
+ self.__destroy()
412
+ return
413
+
414
+ logging.info(f"ScalerClient: request shutdown for {self._scheduler_address.to_address()}")
415
+
416
+ self._future_manager.cancel_all_futures()
417
+
418
+ self._connector_agent.send(ClientDisconnect.new_msg(ClientDisconnect.DisconnectType.Shutdown))
419
+ try:
420
+ self.__receive_shutdown_response()
421
+ finally:
422
+ self.__destroy()
423
+
424
+ def __submit(
425
+ self,
426
+ function_object_id: ObjectID,
427
+ args: Tuple[Any, ...],
428
+ delayed: bool,
429
+ capabilities: Optional[Dict[str, int]] = None,
430
+ ) -> Tuple[Task, ScalerFuture]:
431
+ task_id = TaskID.generate_task_id()
432
+
433
+ capabilities = capabilities or {}
434
+
435
+ function_args: List[Union[ObjectID, TaskID]] = []
436
+ for arg in args:
437
+ if isinstance(arg, ObjectReference):
438
+ if not self._object_buffer.is_valid_object_id(arg.object_id):
439
+ raise MissingObjects(f"unknown object: {arg.object_id!r}.")
440
+
441
+ function_args.append(arg.object_id)
442
+ else:
443
+ function_args.append(self._object_buffer.buffer_send_object(arg).object_id)
444
+
445
+ task_flags_bytes = self.__get_task_flags().serialize()
446
+
447
+ task = Task.new_msg(
448
+ task_id=task_id,
449
+ source=self._identity,
450
+ metadata=task_flags_bytes,
451
+ func_object_id=function_object_id,
452
+ function_args=function_args,
453
+ capabilities=capabilities,
454
+ )
455
+
456
+ future = self._future_factory(task=task, is_delayed=delayed, group_task_id=None)
457
+ self._future_manager.add_future(future)
458
+ return task, future
459
+
460
+ @staticmethod
461
+ def __convert_kwargs_to_args(fn: Callable, args: Tuple[Any, ...], kwargs: Dict[str, Any]) -> Tuple[Any, ...]:
462
+ all_params = [p for p in signature(fn).parameters.values()]
463
+
464
+ params = [p for p in all_params if p.kind in {p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD}]
465
+
466
+ if len(args) >= len(params):
467
+ return args
468
+
469
+ number_of_required = len([p for p in params if p.default is p.empty])
470
+
471
+ args_list = list(args)
472
+ kwargs = kwargs.copy()
473
+ kwargs.update({p.name: p.default for p in all_params if p.kind == p.KEYWORD_ONLY if p.default != p.empty})
474
+
475
+ for p in params[len(args_list) : number_of_required]:
476
+ try:
477
+ args_list.append(kwargs.pop(p.name))
478
+ except KeyError:
479
+ missing = tuple(p.name for p in params[len(args_list) : number_of_required])
480
+ raise TypeError(f"{fn} missing {len(missing)} arguments: {missing}")
481
+
482
+ for p in params[len(args_list) :]:
483
+ args_list.append(kwargs.pop(p.name, p.default))
484
+
485
+ return tuple(args_list)
486
+
487
+ def __split_data_and_graph(
488
+ self, graph: Dict[str, Union[Any, Tuple[Union[Callable, str], ...]]]
489
+ ) -> Tuple[Dict[str, Tuple[ObjectID, Any]], Dict[str, _CallNode]]:
490
+ call_graph = {}
491
+ node_name_to_argument: Dict[str, Tuple[ObjectID, Union[Any, Tuple[Union[Callable, Any], ...]]]] = dict()
492
+
493
+ for node_name, node in graph.items():
494
+ if isinstance(node, tuple) and len(node) > 0 and callable(node[0]):
495
+ call_graph[node_name] = _CallNode(func=node[0], args=node[1:]) # type: ignore[arg-type]
496
+ continue
497
+
498
+ if isinstance(node, ObjectReference):
499
+ object_id = node.object_id
500
+ else:
501
+ object_id = self._object_buffer.buffer_send_object(node, name=node_name).object_id
502
+
503
+ node_name_to_argument[node_name] = (object_id, node)
504
+
505
+ return node_name_to_argument, call_graph
506
+
507
+ @staticmethod
508
+ def __check_graph(
509
+ node_to_argument: Dict[str, Tuple[ObjectID, Any]], call_graph: Dict[str, _CallNode], keys: List[str]
510
+ ):
511
+ duplicate_keys = [key for key, count in dict(Counter(keys)).items() if count > 1]
512
+ if duplicate_keys:
513
+ raise KeyError(f"duplicate key detected in argument keys: {duplicate_keys}")
514
+
515
+ # sanity check graph
516
+ for key in keys:
517
+ if key not in call_graph and key not in node_to_argument:
518
+ raise KeyError(f"key {key} has to be in graph")
519
+
520
+ sorter: TopologicalSorter[str] = TopologicalSorter()
521
+ for node_name, node in call_graph.items():
522
+ for arg in node.args:
523
+ if arg not in node_to_argument and arg not in call_graph:
524
+ raise KeyError(f"argument {arg} in node '{node_name}': {node} is not defined in graph")
525
+
526
+ sorter.add(node_name, *node.args)
527
+
528
+ # check cyclic dependencies
529
+ sorter.prepare()
530
+
531
+ def __construct_graph(
532
+ self,
533
+ node_name_to_arguments: Dict[str, Tuple[ObjectID, Any]],
534
+ call_graph: Dict[str, _CallNode],
535
+ keys: List[str],
536
+ block: bool,
537
+ capabilities: Dict[str, int],
538
+ ) -> Tuple[GraphTask, Dict[str, ScalerFuture], Dict[str, ScalerFuture]]:
539
+ graph_task_id = TaskID.generate_task_id()
540
+
541
+ node_name_to_task_id = {node_name: TaskID.generate_task_id() for node_name in call_graph.keys()}
542
+
543
+ task_flags_bytes = self.__get_task_flags().serialize()
544
+
545
+ task_id_to_tasks = dict()
546
+
547
+ for node_name, node in call_graph.items():
548
+ task_id = node_name_to_task_id[node_name]
549
+ function_cache = self._object_buffer.buffer_send_function(node.func)
550
+
551
+ arguments: List[Union[TaskID, ObjectID]] = []
552
+ for arg in node.args:
553
+ assert arg in call_graph or arg in node_name_to_arguments
554
+
555
+ if arg in call_graph:
556
+ arguments.append(TaskID(node_name_to_task_id[arg]))
557
+ elif arg in node_name_to_arguments:
558
+ argument, _ = node_name_to_arguments[arg]
559
+ arguments.append(argument)
560
+ else:
561
+ raise ValueError("Not possible")
562
+
563
+ task_id_to_tasks[task_id] = Task.new_msg(
564
+ task_id=task_id,
565
+ source=self._identity,
566
+ metadata=task_flags_bytes,
567
+ func_object_id=function_cache.object_id,
568
+ function_args=arguments,
569
+ capabilities=capabilities,
570
+ )
571
+
572
+ result_task_ids = [node_name_to_task_id[key] for key in keys if key in call_graph]
573
+ graph_task = GraphTask.new_msg(graph_task_id, self._identity, result_task_ids, list(task_id_to_tasks.values()))
574
+
575
+ compute_futures = {}
576
+ ready_futures = {}
577
+ for key in keys:
578
+ if key in call_graph:
579
+ compute_futures[key] = self._future_factory(
580
+ task=task_id_to_tasks[node_name_to_task_id[key]], is_delayed=not block, group_task_id=graph_task_id
581
+ )
582
+
583
+ elif key in node_name_to_arguments:
584
+ argument, data = node_name_to_arguments[key]
585
+ future: ScalerFuture = self._future_factory(
586
+ task=Task.new_msg(
587
+ task_id=TaskID.generate_task_id(),
588
+ source=self._identity,
589
+ metadata=b"",
590
+ func_object_id=None,
591
+ function_args=[],
592
+ capabilities={},
593
+ ),
594
+ is_delayed=False,
595
+ group_task_id=graph_task_id,
596
+ )
597
+ future.set_result(data, ProfileResult())
598
+ ready_futures[key] = future
599
+
600
+ else:
601
+ raise ValueError(f"cannot find {key=} in graph")
602
+
603
+ return graph_task, compute_futures, ready_futures
604
+
605
+ def __get_task_flags(self) -> TaskFlags:
606
+ parent_task_priority = self.__get_parent_task_priority()
607
+
608
+ if parent_task_priority is not None:
609
+ task_priority = parent_task_priority + 1
610
+ else:
611
+ task_priority = 0
612
+
613
+ return TaskFlags(profiling=self._profiling, priority=task_priority, stream_output=self._stream_output)
614
+
615
+ def __assert_client_not_stopped(self):
616
+ if self._stop_event.is_set():
617
+ raise ClientQuitException("client is already stopped.")
618
+
619
+ def __destroy(self):
620
+ self._agent.join()
621
+ self._context.destroy(linger=1)
622
+
623
+ @staticmethod
624
+ def __get_parent_task_priority() -> Optional[int]:
625
+ """If the client is running inside a Scaler processor, returns the priority of the associated task."""
626
+
627
+ current_processor = Processor.get_current_processor()
628
+
629
+ if current_processor is None:
630
+ return None
631
+
632
+ current_task = current_processor.current_task()
633
+ assert current_task is not None
634
+
635
+ return retrieve_task_flags_from_task(current_task).priority