opengris-scaler 1.12.28__cp313-cp313-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of opengris-scaler might be problematic. Click here for more details.

Files changed (187) hide show
  1. opengris_scaler-1.12.28.dist-info/METADATA +728 -0
  2. opengris_scaler-1.12.28.dist-info/RECORD +187 -0
  3. opengris_scaler-1.12.28.dist-info/WHEEL +5 -0
  4. opengris_scaler-1.12.28.dist-info/entry_points.txt +10 -0
  5. opengris_scaler-1.12.28.dist-info/licenses/LICENSE +201 -0
  6. opengris_scaler-1.12.28.dist-info/licenses/LICENSE.spdx +7 -0
  7. opengris_scaler-1.12.28.dist-info/licenses/NOTICE +8 -0
  8. opengris_scaler.libs/libcapnp-1-e88d5415.0.1.so +0 -0
  9. opengris_scaler.libs/libgcc_s-2298274a.so.1 +0 -0
  10. opengris_scaler.libs/libkj-1-9bebd8ac.0.1.so +0 -0
  11. opengris_scaler.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
  12. scaler/__init__.py +14 -0
  13. scaler/about.py +5 -0
  14. scaler/client/__init__.py +0 -0
  15. scaler/client/agent/__init__.py +0 -0
  16. scaler/client/agent/client_agent.py +210 -0
  17. scaler/client/agent/disconnect_manager.py +27 -0
  18. scaler/client/agent/future_manager.py +112 -0
  19. scaler/client/agent/heartbeat_manager.py +74 -0
  20. scaler/client/agent/mixins.py +89 -0
  21. scaler/client/agent/object_manager.py +98 -0
  22. scaler/client/agent/task_manager.py +64 -0
  23. scaler/client/client.py +658 -0
  24. scaler/client/future.py +252 -0
  25. scaler/client/object_buffer.py +129 -0
  26. scaler/client/object_reference.py +25 -0
  27. scaler/client/serializer/__init__.py +0 -0
  28. scaler/client/serializer/default.py +16 -0
  29. scaler/client/serializer/mixins.py +38 -0
  30. scaler/cluster/__init__.py +0 -0
  31. scaler/cluster/cluster.py +115 -0
  32. scaler/cluster/combo.py +150 -0
  33. scaler/cluster/object_storage_server.py +45 -0
  34. scaler/cluster/scheduler.py +86 -0
  35. scaler/config/__init__.py +0 -0
  36. scaler/config/defaults.py +94 -0
  37. scaler/config/loader.py +96 -0
  38. scaler/config/mixins.py +20 -0
  39. scaler/config/section/__init__.py +0 -0
  40. scaler/config/section/cluster.py +55 -0
  41. scaler/config/section/ecs_worker_adapter.py +85 -0
  42. scaler/config/section/native_worker_adapter.py +43 -0
  43. scaler/config/section/object_storage_server.py +8 -0
  44. scaler/config/section/scheduler.py +54 -0
  45. scaler/config/section/symphony_worker_adapter.py +47 -0
  46. scaler/config/section/top.py +13 -0
  47. scaler/config/section/webui.py +21 -0
  48. scaler/config/types/__init__.py +0 -0
  49. scaler/config/types/network_backend.py +12 -0
  50. scaler/config/types/object_storage_server.py +45 -0
  51. scaler/config/types/worker.py +62 -0
  52. scaler/config/types/zmq.py +83 -0
  53. scaler/entry_points/__init__.py +0 -0
  54. scaler/entry_points/cluster.py +133 -0
  55. scaler/entry_points/object_storage_server.py +45 -0
  56. scaler/entry_points/scheduler.py +144 -0
  57. scaler/entry_points/top.py +286 -0
  58. scaler/entry_points/webui.py +48 -0
  59. scaler/entry_points/worker_adapter_ecs.py +191 -0
  60. scaler/entry_points/worker_adapter_native.py +137 -0
  61. scaler/entry_points/worker_adapter_symphony.py +98 -0
  62. scaler/io/__init__.py +0 -0
  63. scaler/io/async_binder.py +89 -0
  64. scaler/io/async_connector.py +95 -0
  65. scaler/io/async_object_storage_connector.py +225 -0
  66. scaler/io/mixins.py +154 -0
  67. scaler/io/sync_connector.py +68 -0
  68. scaler/io/sync_object_storage_connector.py +247 -0
  69. scaler/io/sync_subscriber.py +83 -0
  70. scaler/io/utility.py +80 -0
  71. scaler/io/ymq/__init__.py +0 -0
  72. scaler/io/ymq/_ymq.pyi +95 -0
  73. scaler/io/ymq/ymq.py +138 -0
  74. scaler/io/ymq_async_object_storage_connector.py +184 -0
  75. scaler/io/ymq_sync_object_storage_connector.py +184 -0
  76. scaler/object_storage/__init__.py +0 -0
  77. scaler/protocol/__init__.py +0 -0
  78. scaler/protocol/capnp/__init__.py +0 -0
  79. scaler/protocol/capnp/_python.py +6 -0
  80. scaler/protocol/capnp/common.capnp +68 -0
  81. scaler/protocol/capnp/message.capnp +218 -0
  82. scaler/protocol/capnp/object_storage.capnp +57 -0
  83. scaler/protocol/capnp/status.capnp +73 -0
  84. scaler/protocol/introduction.md +105 -0
  85. scaler/protocol/python/__init__.py +0 -0
  86. scaler/protocol/python/common.py +140 -0
  87. scaler/protocol/python/message.py +751 -0
  88. scaler/protocol/python/mixins.py +13 -0
  89. scaler/protocol/python/object_storage.py +118 -0
  90. scaler/protocol/python/status.py +279 -0
  91. scaler/protocol/worker.md +228 -0
  92. scaler/scheduler/__init__.py +0 -0
  93. scaler/scheduler/allocate_policy/__init__.py +0 -0
  94. scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
  95. scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
  96. scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
  97. scaler/scheduler/allocate_policy/mixins.py +55 -0
  98. scaler/scheduler/controllers/__init__.py +0 -0
  99. scaler/scheduler/controllers/balance_controller.py +65 -0
  100. scaler/scheduler/controllers/client_controller.py +131 -0
  101. scaler/scheduler/controllers/config_controller.py +31 -0
  102. scaler/scheduler/controllers/graph_controller.py +424 -0
  103. scaler/scheduler/controllers/information_controller.py +81 -0
  104. scaler/scheduler/controllers/mixins.py +194 -0
  105. scaler/scheduler/controllers/object_controller.py +147 -0
  106. scaler/scheduler/controllers/scaling_policies/__init__.py +0 -0
  107. scaler/scheduler/controllers/scaling_policies/fixed_elastic.py +145 -0
  108. scaler/scheduler/controllers/scaling_policies/mixins.py +10 -0
  109. scaler/scheduler/controllers/scaling_policies/null.py +14 -0
  110. scaler/scheduler/controllers/scaling_policies/types.py +9 -0
  111. scaler/scheduler/controllers/scaling_policies/utility.py +20 -0
  112. scaler/scheduler/controllers/scaling_policies/vanilla.py +95 -0
  113. scaler/scheduler/controllers/task_controller.py +376 -0
  114. scaler/scheduler/controllers/worker_controller.py +169 -0
  115. scaler/scheduler/object_usage/__init__.py +0 -0
  116. scaler/scheduler/object_usage/object_tracker.py +131 -0
  117. scaler/scheduler/scheduler.py +251 -0
  118. scaler/scheduler/task/__init__.py +0 -0
  119. scaler/scheduler/task/task_state_machine.py +92 -0
  120. scaler/scheduler/task/task_state_manager.py +61 -0
  121. scaler/ui/__init__.py +0 -0
  122. scaler/ui/constants.py +9 -0
  123. scaler/ui/live_display.py +147 -0
  124. scaler/ui/memory_window.py +146 -0
  125. scaler/ui/setting_page.py +40 -0
  126. scaler/ui/task_graph.py +832 -0
  127. scaler/ui/task_log.py +107 -0
  128. scaler/ui/utility.py +66 -0
  129. scaler/ui/webui.py +147 -0
  130. scaler/ui/worker_processors.py +104 -0
  131. scaler/utility/__init__.py +0 -0
  132. scaler/utility/debug.py +19 -0
  133. scaler/utility/event_list.py +63 -0
  134. scaler/utility/event_loop.py +58 -0
  135. scaler/utility/exceptions.py +42 -0
  136. scaler/utility/formatter.py +44 -0
  137. scaler/utility/graph/__init__.py +0 -0
  138. scaler/utility/graph/optimization.py +27 -0
  139. scaler/utility/graph/topological_sorter.py +11 -0
  140. scaler/utility/graph/topological_sorter_graphblas.py +174 -0
  141. scaler/utility/identifiers.py +107 -0
  142. scaler/utility/logging/__init__.py +0 -0
  143. scaler/utility/logging/decorators.py +25 -0
  144. scaler/utility/logging/scoped_logger.py +33 -0
  145. scaler/utility/logging/utility.py +183 -0
  146. scaler/utility/many_to_many_dict.py +123 -0
  147. scaler/utility/metadata/__init__.py +0 -0
  148. scaler/utility/metadata/profile_result.py +31 -0
  149. scaler/utility/metadata/task_flags.py +30 -0
  150. scaler/utility/mixins.py +13 -0
  151. scaler/utility/network_util.py +7 -0
  152. scaler/utility/one_to_many_dict.py +72 -0
  153. scaler/utility/queues/__init__.py +0 -0
  154. scaler/utility/queues/async_indexed_queue.py +37 -0
  155. scaler/utility/queues/async_priority_queue.py +70 -0
  156. scaler/utility/queues/async_sorted_priority_queue.py +45 -0
  157. scaler/utility/queues/indexed_queue.py +114 -0
  158. scaler/utility/serialization.py +9 -0
  159. scaler/version.txt +1 -0
  160. scaler/worker/__init__.py +0 -0
  161. scaler/worker/agent/__init__.py +0 -0
  162. scaler/worker/agent/heartbeat_manager.py +107 -0
  163. scaler/worker/agent/mixins.py +137 -0
  164. scaler/worker/agent/processor/__init__.py +0 -0
  165. scaler/worker/agent/processor/object_cache.py +107 -0
  166. scaler/worker/agent/processor/processor.py +285 -0
  167. scaler/worker/agent/processor/streaming_buffer.py +28 -0
  168. scaler/worker/agent/processor_holder.py +147 -0
  169. scaler/worker/agent/processor_manager.py +369 -0
  170. scaler/worker/agent/profiling_manager.py +109 -0
  171. scaler/worker/agent/task_manager.py +150 -0
  172. scaler/worker/agent/timeout_manager.py +19 -0
  173. scaler/worker/preload.py +84 -0
  174. scaler/worker/worker.py +265 -0
  175. scaler/worker_adapter/__init__.py +0 -0
  176. scaler/worker_adapter/common.py +26 -0
  177. scaler/worker_adapter/ecs.py +269 -0
  178. scaler/worker_adapter/native.py +155 -0
  179. scaler/worker_adapter/symphony/__init__.py +0 -0
  180. scaler/worker_adapter/symphony/callback.py +45 -0
  181. scaler/worker_adapter/symphony/heartbeat_manager.py +79 -0
  182. scaler/worker_adapter/symphony/message.py +24 -0
  183. scaler/worker_adapter/symphony/task_manager.py +289 -0
  184. scaler/worker_adapter/symphony/worker.py +204 -0
  185. scaler/worker_adapter/symphony/worker_adapter.py +139 -0
  186. src/scaler/io/ymq/_ymq.so +0 -0
  187. src/scaler/object_storage/object_storage_server.so +0 -0
@@ -0,0 +1,658 @@
1
+ import dataclasses
2
+ import functools
3
+ import logging
4
+ import threading
5
+ import uuid
6
+ from collections import Counter
7
+ from inspect import signature
8
+ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
9
+
10
+ import zmq
11
+
12
+ from scaler.client.agent.client_agent import ClientAgent
13
+ from scaler.client.agent.future_manager import ClientFutureManager
14
+ from scaler.client.future import ScalerFuture
15
+ from scaler.client.object_buffer import ObjectBuffer
16
+ from scaler.client.object_reference import ObjectReference
17
+ from scaler.client.serializer.default import DefaultSerializer
18
+ from scaler.client.serializer.mixins import Serializer
19
+ from scaler.config.defaults import DEFAULT_CLIENT_TIMEOUT_SECONDS, DEFAULT_HEARTBEAT_INTERVAL_SECONDS
20
+ from scaler.config.types.zmq import ZMQConfig, ZMQType
21
+ from scaler.io.mixins import SyncConnector, SyncObjectStorageConnector
22
+ from scaler.io.sync_connector import ZMQSyncConnector
23
+ from scaler.io.utility import create_sync_object_storage_connector
24
+ from scaler.protocol.python.message import ClientDisconnect, ClientShutdownResponse, GraphTask, Task
25
+ from scaler.utility.exceptions import ClientQuitException, MissingObjects
26
+ from scaler.utility.graph.optimization import cull_graph
27
+ from scaler.utility.graph.topological_sorter import TopologicalSorter
28
+ from scaler.utility.identifiers import ClientID, ObjectID, TaskID
29
+ from scaler.utility.metadata.profile_result import ProfileResult
30
+ from scaler.utility.metadata.task_flags import TaskFlags, retrieve_task_flags_from_task
31
+ from scaler.worker.agent.processor.processor import Processor
32
+
33
+
34
+ @dataclasses.dataclass
35
+ class _CallNode:
36
+ func: Callable
37
+ args: Tuple[str, ...]
38
+
39
+ def __post_init__(self):
40
+ if not callable(self.func):
41
+ raise TypeError(f"the first item of the tuple must be function, get {self.func}")
42
+
43
+ if not isinstance(self.args, tuple):
44
+ raise TypeError(f"arguments must be tuple, get {self.args}")
45
+
46
+ for arg in self.args:
47
+ if not isinstance(arg, str):
48
+ raise TypeError(f"argument `{arg}` must be a string and the string has to be in the graph")
49
+
50
+
51
+ class Client:
52
+ def __init__(
53
+ self,
54
+ address: Optional[str] = None,
55
+ profiling: bool = False,
56
+ timeout_seconds: int = DEFAULT_CLIENT_TIMEOUT_SECONDS,
57
+ heartbeat_interval_seconds: int = DEFAULT_HEARTBEAT_INTERVAL_SECONDS,
58
+ serializer: Serializer = DefaultSerializer(),
59
+ stream_output: bool = False,
60
+ ):
61
+ """
62
+ The Scaler Client used to send tasks to a scheduler.
63
+
64
+ :param address: Address of Scheduler to submit work to. If None, will attempt to auto-detect
65
+ when running inside a worker context.
66
+ :type address: Optional[str]
67
+ :param profiling: If True, the returned futures will have the `task_duration()` property enabled.
68
+ :type profiling: bool
69
+ :param timeout_seconds: Seconds until heartbeat times out
70
+ :type timeout_seconds: int
71
+ :param heartbeat_interval_seconds: Frequency of heartbeat to scheduler in seconds
72
+ :type heartbeat_interval_seconds: int
73
+ :param stream_output: If True, stdout/stderr will be streamed to client during task execution
74
+ :type stream_output: bool
75
+ """
76
+ address = self._resolve_scheduler_address(address)
77
+ self.__initialize__(address, profiling, timeout_seconds, heartbeat_interval_seconds, serializer, stream_output)
78
+
79
+ def __initialize__(
80
+ self,
81
+ address: str,
82
+ profiling: bool,
83
+ timeout_seconds: int,
84
+ heartbeat_interval_seconds: int,
85
+ serializer: Serializer = DefaultSerializer(),
86
+ stream_output: bool = False,
87
+ ):
88
+ self._serializer = serializer
89
+
90
+ self._profiling = profiling
91
+ self._stream_output = stream_output
92
+ self._identity = ClientID.generate_client_id()
93
+
94
+ self._client_agent_address = ZMQConfig(ZMQType.inproc, host=f"scaler_client_{uuid.uuid4().hex}")
95
+ self._scheduler_address = ZMQConfig.from_string(address)
96
+ self._timeout_seconds = timeout_seconds
97
+ self._heartbeat_interval_seconds = heartbeat_interval_seconds
98
+
99
+ self._stop_event = threading.Event()
100
+ self._context = zmq.Context()
101
+ self._connector_agent: SyncConnector = ZMQSyncConnector(
102
+ context=self._context, socket_type=zmq.PAIR, address=self._client_agent_address, identity=self._identity
103
+ )
104
+
105
+ self._future_manager = ClientFutureManager(self._serializer)
106
+ self._agent = ClientAgent(
107
+ identity=self._identity,
108
+ client_agent_address=self._client_agent_address,
109
+ scheduler_address=ZMQConfig.from_string(address),
110
+ context=self._context,
111
+ future_manager=self._future_manager,
112
+ stop_event=self._stop_event,
113
+ timeout_seconds=self._timeout_seconds,
114
+ heartbeat_interval_seconds=self._heartbeat_interval_seconds,
115
+ serializer=self._serializer,
116
+ )
117
+ self._agent.start()
118
+
119
+ logging.info(f"ScalerClient: connect to scheduler at {self._scheduler_address}")
120
+
121
+ # Blocks until the agent receives the object storage address
122
+ self._object_storage_address = self._agent.get_object_storage_address()
123
+
124
+ logging.info(f"ScalerClient: connect to object storage at {self._object_storage_address}")
125
+ self._connector_storage: SyncObjectStorageConnector = create_sync_object_storage_connector(
126
+ self._object_storage_address.host, self._object_storage_address.port
127
+ )
128
+
129
+ self._object_buffer = ObjectBuffer(
130
+ self._identity, self._serializer, self._connector_agent, self._connector_storage
131
+ )
132
+ self._future_factory = functools.partial(
133
+ ScalerFuture,
134
+ serializer=self._serializer,
135
+ connector_agent=self._connector_agent,
136
+ connector_storage=self._connector_storage,
137
+ )
138
+
139
+ @property
140
+ def identity(self) -> ClientID:
141
+ return self._identity
142
+
143
+ def __del__(self):
144
+ self.disconnect()
145
+
146
+ def __enter__(self):
147
+ return self
148
+
149
+ def __exit__(self, exc_type, exc_val, exc_tb):
150
+ self.disconnect()
151
+
152
+ def __getstate__(self) -> dict:
153
+ """
154
+ Serializes the client object's state.
155
+
156
+ Client serialization is useful when a client reference is used within a remote task:
157
+
158
+
159
+ .. code:: python
160
+
161
+ client = Client(...)
162
+
163
+ def fibonacci(client: Client, n: int):
164
+ if n == 0:
165
+ return 0
166
+ elif n == 1:
167
+ return 1
168
+ else:
169
+ a = client.submit(fibonacci, n - 1)
170
+ b = client.submit(fibonacci, n - 2)
171
+ return a.result() + b.result()
172
+
173
+ print(client.submit(fibonacci, client, 7).result())
174
+
175
+
176
+ When serializing the client, only saves the address parameters. When deserialized, a new client object
177
+ connecting to the same scheduler and remote logger will be instantiated.
178
+ """
179
+
180
+ return {
181
+ "address": self._scheduler_address.to_address(),
182
+ "profiling": self._profiling,
183
+ "stream_output": self._stream_output,
184
+ "timeout_seconds": self._timeout_seconds,
185
+ "heartbeat_interval_seconds": self._heartbeat_interval_seconds,
186
+ }
187
+
188
+ def __setstate__(self, state: dict) -> None:
189
+ # TODO: fix copy the serializer
190
+ self.__initialize__(
191
+ address=state["address"],
192
+ profiling=state["profiling"],
193
+ stream_output=state["stream_output"],
194
+ timeout_seconds=state["timeout_seconds"],
195
+ heartbeat_interval_seconds=state["heartbeat_interval_seconds"],
196
+ )
197
+
198
+ def submit(self, fn: Callable, *args, **kwargs) -> ScalerFuture:
199
+ """
200
+ Submit a single task (function with arguments) to the scheduler, and return a future.
201
+
202
+ See `submit_verbose()` for additional parameters.
203
+
204
+ :param fn: function to be executed remotely
205
+ :type fn: Callable
206
+ :param args: positional arguments will be passed to function
207
+ :param kwargs: keyword arguments will be passed to function
208
+ :return: future of the submitted task
209
+ :rtype: ScalerFuture
210
+ """
211
+
212
+ return self.submit_verbose(fn, args, kwargs)
213
+
214
+ def submit_verbose(
215
+ self, fn: Callable, args: Tuple[Any, ...], kwargs: Dict[str, Any], capabilities: Optional[Dict[str, int]] = None
216
+ ) -> ScalerFuture:
217
+ """
218
+ Submit a single task (function with arguments) to the scheduler, and return a future. Possibly route the task to
219
+ specific workers.
220
+
221
+ :param fn: function to be executed remotely
222
+ :type fn: Callable
223
+ :param args: positional arguments will be passed to function
224
+ :param kwargs: keyword arguments will be passed to function
225
+ :param capabilities: capabilities used for routing the tasks, e.g. `{"gpu": 2, "memory": 1_000_000_000}`.
226
+ :type capabilities: Optional[Dict[str, int]]
227
+ :return: future of the submitted task
228
+ :rtype: ScalerFuture
229
+ """
230
+
231
+ self.__assert_client_not_stopped()
232
+
233
+ function_object_id = self._object_buffer.buffer_send_function(fn).object_id
234
+ all_args = Client.__convert_kwargs_to_args(fn, args, kwargs)
235
+
236
+ task, future = self.__submit(function_object_id, all_args, delayed=True, capabilities=capabilities)
237
+
238
+ self._object_buffer.commit_send_objects()
239
+ self._connector_agent.send(task)
240
+ return future
241
+
242
+ def map(
243
+ self, fn: Callable, iterable: Iterable[Tuple[Any, ...]], capabilities: Optional[Dict[str, int]] = None
244
+ ) -> List[Any]:
245
+ if not all(isinstance(args, (tuple, list)) for args in iterable):
246
+ raise TypeError("iterable should be list of arguments(list or tuple-like) of function")
247
+
248
+ self.__assert_client_not_stopped()
249
+
250
+ function_object_id = self._object_buffer.buffer_send_function(fn).object_id
251
+ tasks, futures = zip(
252
+ *[self.__submit(function_object_id, args, delayed=False, capabilities=capabilities) for args in iterable]
253
+ )
254
+
255
+ self._object_buffer.commit_send_objects()
256
+ for task in tasks:
257
+ self._connector_agent.send(task)
258
+
259
+ try:
260
+ results = [fut.result() for fut in futures]
261
+ except Exception as e:
262
+ logging.exception(f"error happened when do scaler client.map:\n{e}")
263
+ self.disconnect()
264
+ raise e
265
+
266
+ return results
267
+
268
+ def get(
269
+ self,
270
+ graph: Dict[str, Union[Any, Tuple[Union[Callable, str], ...]]],
271
+ keys: List[str],
272
+ block: bool = True,
273
+ capabilities: Optional[Dict[str, int]] = None,
274
+ ) -> Dict[str, Union[Any, ScalerFuture]]:
275
+ """
276
+ .. code-block:: python
277
+ :linenos:
278
+ graph = {
279
+ "a": 1,
280
+ "b": 2,
281
+ "c": (inc, "a"),
282
+ "d": (inc, "b"),
283
+ "e": (add, "c", "d")
284
+ }
285
+
286
+ :param graph: dictionary presentation of task graphs
287
+ :type graph: Dict[str, Union[Any, Tuple[Union[Callable, Any]]
288
+ :param keys: list of keys want to get results from computed graph
289
+ :type keys: List[str]
290
+ :param block: if True, it will directly return a dictionary that maps from keys to results
291
+ :return: dictionary of mapping keys to futures, or map to results if block=True is specified
292
+ :param capabilities: capabilities used for routing the tasks, e.g. `{"gpu": 2, "memory": 1_000_000_000}`.
293
+ :type capabilities: Optional[Dict[str, int]]
294
+ :rtype: Dict[ScalerFuture]
295
+ """
296
+
297
+ self.__assert_client_not_stopped()
298
+
299
+ capabilities = capabilities or {}
300
+
301
+ graph = cull_graph(graph, keys)
302
+
303
+ node_name_to_argument, call_graph = self.__split_data_and_graph(graph)
304
+ self.__check_graph(node_name_to_argument, call_graph, keys)
305
+
306
+ graph_task, compute_futures, finished_futures = self.__construct_graph(
307
+ node_name_to_argument, call_graph, keys, block, capabilities
308
+ )
309
+ self._object_buffer.commit_send_objects()
310
+ self._connector_agent.send(graph_task)
311
+
312
+ self._future_manager.add_future(
313
+ self._future_factory(
314
+ task=Task.new_msg(
315
+ task_id=graph_task.task_id,
316
+ source=self._identity,
317
+ metadata=b"",
318
+ func_object_id=None,
319
+ function_args=[],
320
+ capabilities=capabilities,
321
+ ),
322
+ is_delayed=not block,
323
+ group_task_id=graph_task.task_id,
324
+ )
325
+ )
326
+ for future in compute_futures.values():
327
+ self._future_manager.add_future(future)
328
+
329
+ # preserve the future insertion order based on inputted keys
330
+ futures = {}
331
+ for key in keys:
332
+ if key in compute_futures:
333
+ futures[key] = compute_futures[key]
334
+ else:
335
+ futures[key] = finished_futures[key]
336
+
337
+ if not block:
338
+ # just return futures
339
+ return futures
340
+
341
+ try:
342
+ results = {k: v.result() for k, v in futures.items()}
343
+ except Exception as e:
344
+ logging.exception(f"error happened when do scaler client.get:\n{e}")
345
+ self.disconnect()
346
+ raise e
347
+
348
+ return results
349
+
350
+ def send_object(self, obj: Any, name: Optional[str] = None) -> ObjectReference:
351
+ """
352
+ send object to scheduler, this can be used to cache very large data to scheduler, and reuse it in multiple
353
+ tasks
354
+
355
+ :param obj: object to send, it will be serialized and send to scheduler
356
+ :type obj: Any
357
+ :param name: give a name to the cached argument
358
+ :type name: Optional[str]
359
+ :return: object reference
360
+ :rtype ObjectReference
361
+ """
362
+
363
+ self.__assert_client_not_stopped()
364
+
365
+ cache = self._object_buffer.buffer_send_object(obj, name)
366
+ return ObjectReference(cache.object_name, len(cache.object_payload), cache.object_id)
367
+
368
+ def clear(self):
369
+ """
370
+ clear all resources used by the client, this will cancel all running futures and invalidate all existing object
371
+ references
372
+ """
373
+
374
+ # It's important to be ensure that all running futures are cancelled/finished before clearing object, or else we
375
+ # might end up with tasks indefinitely waiting on no longer existing objects.
376
+ self._future_manager.cancel_all_futures()
377
+
378
+ self._object_buffer.clear()
379
+
380
+ def disconnect(self):
381
+ """
382
+ disconnect from connected scheduler, this will not shut down the scheduler
383
+ """
384
+
385
+ # Handle case where client wasn't fully initialized
386
+ if not hasattr(self, "_stop_event"):
387
+ return
388
+
389
+ if self._stop_event.is_set():
390
+ self.__destroy()
391
+ return
392
+
393
+ logging.info(f"ScalerClient: disconnect from {self._scheduler_address.to_address()}")
394
+
395
+ self._future_manager.cancel_all_futures()
396
+
397
+ self._connector_agent.send(ClientDisconnect.new_msg(ClientDisconnect.DisconnectType.Disconnect))
398
+
399
+ self.__destroy()
400
+
401
+ def __receive_shutdown_response(self):
402
+ message: Optional[ClientShutdownResponse] = None
403
+ while not isinstance(message, ClientShutdownResponse):
404
+ message = self._connector_agent.receive()
405
+
406
+ if not message.accepted:
407
+ raise ValueError("Scheduler is in protected mode. Can't shutdown")
408
+
409
+ def shutdown(self):
410
+ """
411
+ shutdown all workers that connected to the scheduler this client connects to, it will cancel all other
412
+ clients' ongoing tasks, please be aware shutdown might not success if scheduler is configured as protected mode,
413
+ then it cannot shut down scheduler and the workers
414
+ """
415
+
416
+ if not self._agent.is_alive():
417
+ self.__destroy()
418
+ return
419
+
420
+ logging.info(f"ScalerClient: request shutdown for {self._scheduler_address.to_address()}")
421
+
422
+ self._future_manager.cancel_all_futures()
423
+
424
+ self._connector_agent.send(ClientDisconnect.new_msg(ClientDisconnect.DisconnectType.Shutdown))
425
+ try:
426
+ self.__receive_shutdown_response()
427
+ finally:
428
+ self.__destroy()
429
+
430
+ def __submit(
431
+ self,
432
+ function_object_id: ObjectID,
433
+ args: Tuple[Any, ...],
434
+ delayed: bool,
435
+ capabilities: Optional[Dict[str, int]] = None,
436
+ ) -> Tuple[Task, ScalerFuture]:
437
+ task_id = TaskID.generate_task_id()
438
+
439
+ capabilities = capabilities or {}
440
+
441
+ function_args: List[Union[ObjectID, TaskID]] = []
442
+ for arg in args:
443
+ if isinstance(arg, ObjectReference):
444
+ if not self._object_buffer.is_valid_object_id(arg.object_id):
445
+ raise MissingObjects(f"unknown object: {arg.object_id!r}.")
446
+
447
+ function_args.append(arg.object_id)
448
+ else:
449
+ function_args.append(self._object_buffer.buffer_send_object(arg).object_id)
450
+
451
+ task_flags_bytes = self.__get_task_flags().serialize()
452
+
453
+ task = Task.new_msg(
454
+ task_id=task_id,
455
+ source=self._identity,
456
+ metadata=task_flags_bytes,
457
+ func_object_id=function_object_id,
458
+ function_args=function_args,
459
+ capabilities=capabilities,
460
+ )
461
+
462
+ future = self._future_factory(task=task, is_delayed=delayed, group_task_id=None)
463
+ self._future_manager.add_future(future)
464
+ return task, future
465
+
466
+ @staticmethod
467
+ def __convert_kwargs_to_args(fn: Callable, args: Tuple[Any, ...], kwargs: Dict[str, Any]) -> Tuple[Any, ...]:
468
+ all_params = [p for p in signature(fn).parameters.values()]
469
+
470
+ params = [p for p in all_params if p.kind in {p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD}]
471
+
472
+ if len(args) >= len(params):
473
+ return args
474
+
475
+ number_of_required = len([p for p in params if p.default is p.empty])
476
+
477
+ args_list = list(args)
478
+ kwargs = kwargs.copy()
479
+ kwargs.update({p.name: p.default for p in all_params if p.kind == p.KEYWORD_ONLY if p.default != p.empty})
480
+
481
+ for p in params[len(args_list) : number_of_required]:
482
+ try:
483
+ args_list.append(kwargs.pop(p.name))
484
+ except KeyError:
485
+ missing = tuple(p.name for p in params[len(args_list) : number_of_required])
486
+ raise TypeError(f"{fn} missing {len(missing)} arguments: {missing}")
487
+
488
+ for p in params[len(args_list) :]:
489
+ args_list.append(kwargs.pop(p.name, p.default))
490
+
491
+ return tuple(args_list)
492
+
493
+ def __split_data_and_graph(
494
+ self, graph: Dict[str, Union[Any, Tuple[Union[Callable, str], ...]]]
495
+ ) -> Tuple[Dict[str, Tuple[ObjectID, Any]], Dict[str, _CallNode]]:
496
+ call_graph = {}
497
+ node_name_to_argument: Dict[str, Tuple[ObjectID, Union[Any, Tuple[Union[Callable, Any], ...]]]] = dict()
498
+
499
+ for node_name, node in graph.items():
500
+ if isinstance(node, tuple) and len(node) > 0 and callable(node[0]):
501
+ call_graph[node_name] = _CallNode(func=node[0], args=node[1:]) # type: ignore[arg-type]
502
+ continue
503
+
504
+ if isinstance(node, ObjectReference):
505
+ object_id = node.object_id
506
+ else:
507
+ object_id = self._object_buffer.buffer_send_object(node, name=node_name).object_id
508
+
509
+ node_name_to_argument[node_name] = (object_id, node)
510
+
511
+ return node_name_to_argument, call_graph
512
+
513
+ @staticmethod
514
+ def __check_graph(
515
+ node_to_argument: Dict[str, Tuple[ObjectID, Any]], call_graph: Dict[str, _CallNode], keys: List[str]
516
+ ):
517
+ duplicate_keys = [key for key, count in dict(Counter(keys)).items() if count > 1]
518
+ if duplicate_keys:
519
+ raise KeyError(f"duplicate key detected in argument keys: {duplicate_keys}")
520
+
521
+ # sanity check graph
522
+ for key in keys:
523
+ if key not in call_graph and key not in node_to_argument:
524
+ raise KeyError(f"key {key} has to be in graph")
525
+
526
+ sorter: TopologicalSorter[str] = TopologicalSorter()
527
+ for node_name, node in call_graph.items():
528
+ for arg in node.args:
529
+ if arg not in node_to_argument and arg not in call_graph:
530
+ raise KeyError(f"argument {arg} in node '{node_name}': {node} is not defined in graph")
531
+
532
+ sorter.add(node_name, *node.args)
533
+
534
+ # check cyclic dependencies
535
+ sorter.prepare()
536
+
537
+ def __construct_graph(
538
+ self,
539
+ node_name_to_arguments: Dict[str, Tuple[ObjectID, Any]],
540
+ call_graph: Dict[str, _CallNode],
541
+ keys: List[str],
542
+ block: bool,
543
+ capabilities: Dict[str, int],
544
+ ) -> Tuple[GraphTask, Dict[str, ScalerFuture], Dict[str, ScalerFuture]]:
545
+ graph_task_id = TaskID.generate_task_id()
546
+
547
+ node_name_to_task_id = {node_name: TaskID.generate_task_id() for node_name in call_graph.keys()}
548
+
549
+ task_flags_bytes = self.__get_task_flags().serialize()
550
+
551
+ task_id_to_tasks = dict()
552
+
553
+ for node_name, node in call_graph.items():
554
+ task_id = node_name_to_task_id[node_name]
555
+ function_cache = self._object_buffer.buffer_send_function(node.func)
556
+
557
+ arguments: List[Union[TaskID, ObjectID]] = []
558
+ for arg in node.args:
559
+ assert arg in call_graph or arg in node_name_to_arguments
560
+
561
+ if arg in call_graph:
562
+ arguments.append(TaskID(node_name_to_task_id[arg]))
563
+ elif arg in node_name_to_arguments:
564
+ argument, _ = node_name_to_arguments[arg]
565
+ arguments.append(argument)
566
+ else:
567
+ raise ValueError("Not possible")
568
+
569
+ task_id_to_tasks[task_id] = Task.new_msg(
570
+ task_id=task_id,
571
+ source=self._identity,
572
+ metadata=task_flags_bytes,
573
+ func_object_id=function_cache.object_id,
574
+ function_args=arguments,
575
+ capabilities=capabilities,
576
+ )
577
+
578
+ result_task_ids = [node_name_to_task_id[key] for key in keys if key in call_graph]
579
+ graph_task = GraphTask.new_msg(graph_task_id, self._identity, result_task_ids, list(task_id_to_tasks.values()))
580
+
581
+ compute_futures = {}
582
+ ready_futures = {}
583
+ for key in keys:
584
+ if key in call_graph:
585
+ compute_futures[key] = self._future_factory(
586
+ task=task_id_to_tasks[node_name_to_task_id[key]], is_delayed=not block, group_task_id=graph_task_id
587
+ )
588
+
589
+ elif key in node_name_to_arguments:
590
+ argument, data = node_name_to_arguments[key]
591
+ future: ScalerFuture = self._future_factory(
592
+ task=Task.new_msg(
593
+ task_id=TaskID.generate_task_id(),
594
+ source=self._identity,
595
+ metadata=b"",
596
+ func_object_id=None,
597
+ function_args=[],
598
+ capabilities={},
599
+ ),
600
+ is_delayed=False,
601
+ group_task_id=graph_task_id,
602
+ )
603
+ future.set_result(data, ProfileResult())
604
+ ready_futures[key] = future
605
+
606
+ else:
607
+ raise ValueError(f"cannot find {key=} in graph")
608
+
609
+ return graph_task, compute_futures, ready_futures
610
+
611
+ def __get_task_flags(self) -> TaskFlags:
612
+ parent_task_priority = self.__get_parent_task_priority()
613
+
614
+ if parent_task_priority is not None:
615
+ task_priority = parent_task_priority + 1
616
+ else:
617
+ task_priority = 0
618
+
619
+ return TaskFlags(profiling=self._profiling, priority=task_priority, stream_output=self._stream_output)
620
+
621
+ def __assert_client_not_stopped(self):
622
+ if self._stop_event.is_set():
623
+ raise ClientQuitException("client is already stopped.")
624
+
625
+ def __destroy(self):
626
+ self._agent.join()
627
+ self._context.destroy(linger=1)
628
+
629
+ @staticmethod
630
+ def __get_parent_task_priority() -> Optional[int]:
631
+ """If the client is running inside a Scaler processor, returns the priority of the associated task."""
632
+
633
+ current_processor = Processor.get_current_processor()
634
+
635
+ if current_processor is None:
636
+ return None
637
+
638
+ current_task = current_processor.current_task()
639
+ assert current_task is not None
640
+
641
+ return retrieve_task_flags_from_task(current_task).priority
642
+
643
+ def _resolve_scheduler_address(self, address: Optional[str]) -> str:
644
+ """Resolve the scheduler address based on the provided address and worker context."""
645
+ # Provided address always takes precedence
646
+ if address is not None:
647
+ return address
648
+
649
+ # No address provided, check if we're running inside a worker context
650
+ current_processor = Processor.get_current_processor()
651
+ if current_processor is None:
652
+ raise ValueError(
653
+ "No scheduler address provided and not running inside a worker context. "
654
+ "Please provide a scheduler address when creating the Client outside of a worker."
655
+ )
656
+
657
+ # Return the scheduler address from the current processor
658
+ return current_processor.scheduler_address().to_address()