opengris-scaler 1.12.28__cp313-cp313-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of opengris-scaler might be problematic. Click here for more details.

Files changed (187) hide show
  1. opengris_scaler-1.12.28.dist-info/METADATA +728 -0
  2. opengris_scaler-1.12.28.dist-info/RECORD +187 -0
  3. opengris_scaler-1.12.28.dist-info/WHEEL +5 -0
  4. opengris_scaler-1.12.28.dist-info/entry_points.txt +10 -0
  5. opengris_scaler-1.12.28.dist-info/licenses/LICENSE +201 -0
  6. opengris_scaler-1.12.28.dist-info/licenses/LICENSE.spdx +7 -0
  7. opengris_scaler-1.12.28.dist-info/licenses/NOTICE +8 -0
  8. opengris_scaler.libs/libcapnp-1-e88d5415.0.1.so +0 -0
  9. opengris_scaler.libs/libgcc_s-2298274a.so.1 +0 -0
  10. opengris_scaler.libs/libkj-1-9bebd8ac.0.1.so +0 -0
  11. opengris_scaler.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
  12. scaler/__init__.py +14 -0
  13. scaler/about.py +5 -0
  14. scaler/client/__init__.py +0 -0
  15. scaler/client/agent/__init__.py +0 -0
  16. scaler/client/agent/client_agent.py +210 -0
  17. scaler/client/agent/disconnect_manager.py +27 -0
  18. scaler/client/agent/future_manager.py +112 -0
  19. scaler/client/agent/heartbeat_manager.py +74 -0
  20. scaler/client/agent/mixins.py +89 -0
  21. scaler/client/agent/object_manager.py +98 -0
  22. scaler/client/agent/task_manager.py +64 -0
  23. scaler/client/client.py +658 -0
  24. scaler/client/future.py +252 -0
  25. scaler/client/object_buffer.py +129 -0
  26. scaler/client/object_reference.py +25 -0
  27. scaler/client/serializer/__init__.py +0 -0
  28. scaler/client/serializer/default.py +16 -0
  29. scaler/client/serializer/mixins.py +38 -0
  30. scaler/cluster/__init__.py +0 -0
  31. scaler/cluster/cluster.py +115 -0
  32. scaler/cluster/combo.py +150 -0
  33. scaler/cluster/object_storage_server.py +45 -0
  34. scaler/cluster/scheduler.py +86 -0
  35. scaler/config/__init__.py +0 -0
  36. scaler/config/defaults.py +94 -0
  37. scaler/config/loader.py +96 -0
  38. scaler/config/mixins.py +20 -0
  39. scaler/config/section/__init__.py +0 -0
  40. scaler/config/section/cluster.py +55 -0
  41. scaler/config/section/ecs_worker_adapter.py +85 -0
  42. scaler/config/section/native_worker_adapter.py +43 -0
  43. scaler/config/section/object_storage_server.py +8 -0
  44. scaler/config/section/scheduler.py +54 -0
  45. scaler/config/section/symphony_worker_adapter.py +47 -0
  46. scaler/config/section/top.py +13 -0
  47. scaler/config/section/webui.py +21 -0
  48. scaler/config/types/__init__.py +0 -0
  49. scaler/config/types/network_backend.py +12 -0
  50. scaler/config/types/object_storage_server.py +45 -0
  51. scaler/config/types/worker.py +62 -0
  52. scaler/config/types/zmq.py +83 -0
  53. scaler/entry_points/__init__.py +0 -0
  54. scaler/entry_points/cluster.py +133 -0
  55. scaler/entry_points/object_storage_server.py +45 -0
  56. scaler/entry_points/scheduler.py +144 -0
  57. scaler/entry_points/top.py +286 -0
  58. scaler/entry_points/webui.py +48 -0
  59. scaler/entry_points/worker_adapter_ecs.py +191 -0
  60. scaler/entry_points/worker_adapter_native.py +137 -0
  61. scaler/entry_points/worker_adapter_symphony.py +98 -0
  62. scaler/io/__init__.py +0 -0
  63. scaler/io/async_binder.py +89 -0
  64. scaler/io/async_connector.py +95 -0
  65. scaler/io/async_object_storage_connector.py +225 -0
  66. scaler/io/mixins.py +154 -0
  67. scaler/io/sync_connector.py +68 -0
  68. scaler/io/sync_object_storage_connector.py +247 -0
  69. scaler/io/sync_subscriber.py +83 -0
  70. scaler/io/utility.py +80 -0
  71. scaler/io/ymq/__init__.py +0 -0
  72. scaler/io/ymq/_ymq.pyi +95 -0
  73. scaler/io/ymq/ymq.py +138 -0
  74. scaler/io/ymq_async_object_storage_connector.py +184 -0
  75. scaler/io/ymq_sync_object_storage_connector.py +184 -0
  76. scaler/object_storage/__init__.py +0 -0
  77. scaler/protocol/__init__.py +0 -0
  78. scaler/protocol/capnp/__init__.py +0 -0
  79. scaler/protocol/capnp/_python.py +6 -0
  80. scaler/protocol/capnp/common.capnp +68 -0
  81. scaler/protocol/capnp/message.capnp +218 -0
  82. scaler/protocol/capnp/object_storage.capnp +57 -0
  83. scaler/protocol/capnp/status.capnp +73 -0
  84. scaler/protocol/introduction.md +105 -0
  85. scaler/protocol/python/__init__.py +0 -0
  86. scaler/protocol/python/common.py +140 -0
  87. scaler/protocol/python/message.py +751 -0
  88. scaler/protocol/python/mixins.py +13 -0
  89. scaler/protocol/python/object_storage.py +118 -0
  90. scaler/protocol/python/status.py +279 -0
  91. scaler/protocol/worker.md +228 -0
  92. scaler/scheduler/__init__.py +0 -0
  93. scaler/scheduler/allocate_policy/__init__.py +0 -0
  94. scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
  95. scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
  96. scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
  97. scaler/scheduler/allocate_policy/mixins.py +55 -0
  98. scaler/scheduler/controllers/__init__.py +0 -0
  99. scaler/scheduler/controllers/balance_controller.py +65 -0
  100. scaler/scheduler/controllers/client_controller.py +131 -0
  101. scaler/scheduler/controllers/config_controller.py +31 -0
  102. scaler/scheduler/controllers/graph_controller.py +424 -0
  103. scaler/scheduler/controllers/information_controller.py +81 -0
  104. scaler/scheduler/controllers/mixins.py +194 -0
  105. scaler/scheduler/controllers/object_controller.py +147 -0
  106. scaler/scheduler/controllers/scaling_policies/__init__.py +0 -0
  107. scaler/scheduler/controllers/scaling_policies/fixed_elastic.py +145 -0
  108. scaler/scheduler/controllers/scaling_policies/mixins.py +10 -0
  109. scaler/scheduler/controllers/scaling_policies/null.py +14 -0
  110. scaler/scheduler/controllers/scaling_policies/types.py +9 -0
  111. scaler/scheduler/controllers/scaling_policies/utility.py +20 -0
  112. scaler/scheduler/controllers/scaling_policies/vanilla.py +95 -0
  113. scaler/scheduler/controllers/task_controller.py +376 -0
  114. scaler/scheduler/controllers/worker_controller.py +169 -0
  115. scaler/scheduler/object_usage/__init__.py +0 -0
  116. scaler/scheduler/object_usage/object_tracker.py +131 -0
  117. scaler/scheduler/scheduler.py +251 -0
  118. scaler/scheduler/task/__init__.py +0 -0
  119. scaler/scheduler/task/task_state_machine.py +92 -0
  120. scaler/scheduler/task/task_state_manager.py +61 -0
  121. scaler/ui/__init__.py +0 -0
  122. scaler/ui/constants.py +9 -0
  123. scaler/ui/live_display.py +147 -0
  124. scaler/ui/memory_window.py +146 -0
  125. scaler/ui/setting_page.py +40 -0
  126. scaler/ui/task_graph.py +832 -0
  127. scaler/ui/task_log.py +107 -0
  128. scaler/ui/utility.py +66 -0
  129. scaler/ui/webui.py +147 -0
  130. scaler/ui/worker_processors.py +104 -0
  131. scaler/utility/__init__.py +0 -0
  132. scaler/utility/debug.py +19 -0
  133. scaler/utility/event_list.py +63 -0
  134. scaler/utility/event_loop.py +58 -0
  135. scaler/utility/exceptions.py +42 -0
  136. scaler/utility/formatter.py +44 -0
  137. scaler/utility/graph/__init__.py +0 -0
  138. scaler/utility/graph/optimization.py +27 -0
  139. scaler/utility/graph/topological_sorter.py +11 -0
  140. scaler/utility/graph/topological_sorter_graphblas.py +174 -0
  141. scaler/utility/identifiers.py +107 -0
  142. scaler/utility/logging/__init__.py +0 -0
  143. scaler/utility/logging/decorators.py +25 -0
  144. scaler/utility/logging/scoped_logger.py +33 -0
  145. scaler/utility/logging/utility.py +183 -0
  146. scaler/utility/many_to_many_dict.py +123 -0
  147. scaler/utility/metadata/__init__.py +0 -0
  148. scaler/utility/metadata/profile_result.py +31 -0
  149. scaler/utility/metadata/task_flags.py +30 -0
  150. scaler/utility/mixins.py +13 -0
  151. scaler/utility/network_util.py +7 -0
  152. scaler/utility/one_to_many_dict.py +72 -0
  153. scaler/utility/queues/__init__.py +0 -0
  154. scaler/utility/queues/async_indexed_queue.py +37 -0
  155. scaler/utility/queues/async_priority_queue.py +70 -0
  156. scaler/utility/queues/async_sorted_priority_queue.py +45 -0
  157. scaler/utility/queues/indexed_queue.py +114 -0
  158. scaler/utility/serialization.py +9 -0
  159. scaler/version.txt +1 -0
  160. scaler/worker/__init__.py +0 -0
  161. scaler/worker/agent/__init__.py +0 -0
  162. scaler/worker/agent/heartbeat_manager.py +107 -0
  163. scaler/worker/agent/mixins.py +137 -0
  164. scaler/worker/agent/processor/__init__.py +0 -0
  165. scaler/worker/agent/processor/object_cache.py +107 -0
  166. scaler/worker/agent/processor/processor.py +285 -0
  167. scaler/worker/agent/processor/streaming_buffer.py +28 -0
  168. scaler/worker/agent/processor_holder.py +147 -0
  169. scaler/worker/agent/processor_manager.py +369 -0
  170. scaler/worker/agent/profiling_manager.py +109 -0
  171. scaler/worker/agent/task_manager.py +150 -0
  172. scaler/worker/agent/timeout_manager.py +19 -0
  173. scaler/worker/preload.py +84 -0
  174. scaler/worker/worker.py +265 -0
  175. scaler/worker_adapter/__init__.py +0 -0
  176. scaler/worker_adapter/common.py +26 -0
  177. scaler/worker_adapter/ecs.py +269 -0
  178. scaler/worker_adapter/native.py +155 -0
  179. scaler/worker_adapter/symphony/__init__.py +0 -0
  180. scaler/worker_adapter/symphony/callback.py +45 -0
  181. scaler/worker_adapter/symphony/heartbeat_manager.py +79 -0
  182. scaler/worker_adapter/symphony/message.py +24 -0
  183. scaler/worker_adapter/symphony/task_manager.py +289 -0
  184. scaler/worker_adapter/symphony/worker.py +204 -0
  185. scaler/worker_adapter/symphony/worker_adapter.py +139 -0
  186. src/scaler/io/ymq/_ymq.so +0 -0
  187. src/scaler/object_storage/object_storage_server.so +0 -0
@@ -0,0 +1,252 @@
1
+ import concurrent.futures
2
+ from typing import Any, Callable, Optional
3
+
4
+ from scaler.client.serializer.mixins import Serializer
5
+ from scaler.io.mixins import SyncConnector, SyncObjectStorageConnector
6
+ from scaler.protocol.python.common import TaskState
7
+ from scaler.protocol.python.message import Task, TaskCancel
8
+ from scaler.utility.event_list import EventList
9
+ from scaler.utility.identifiers import ObjectID, TaskID
10
+ from scaler.utility.metadata.profile_result import ProfileResult
11
+ from scaler.utility.serialization import deserialize_failure
12
+
13
+
14
+ class ScalerFuture(concurrent.futures.Future):
15
+ """
16
+ A drop-in replacement for Python's `concurrent.futures.Future`.
17
+
18
+ This class is designed to be compatible with Python's Future API, but with some key differences:
19
+
20
+ - Delayed futures (`is_delayed` set to `True`) might not fetch the result data when the future is done.
21
+ Instead, the result is lazily fetched when `result()` or `exception()` is called, or when a callback or waiter is
22
+ added. That is, `result()` might temporarily be blocking even if `done()` is `True`.
23
+
24
+ - `cancel()` may block until a cancellation confirmation is received from Scaler's scheduler.
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ task: Task,
30
+ is_delayed: bool,
31
+ group_task_id: Optional[TaskID],
32
+ serializer: Serializer,
33
+ connector_agent: SyncConnector,
34
+ connector_storage: SyncObjectStorageConnector,
35
+ ):
36
+ super().__init__()
37
+
38
+ self._waiters = EventList(self._waiters) # type: ignore[assignment]
39
+ self._waiters.add_update_callback(self._on_waiters_updated) # type: ignore[attr-defined]
40
+
41
+ self._task_id: TaskID = task.task_id
42
+ self._is_delayed: bool = is_delayed
43
+ self._group_task_id: Optional[TaskID] = group_task_id
44
+ self._serializer: Serializer = serializer
45
+ self._connector_agent: SyncConnector = connector_agent
46
+ self._connector_storage: SyncObjectStorageConnector = connector_storage
47
+
48
+ self._result_object_id: Optional[ObjectID] = None
49
+ self._result_received = False
50
+ self._task_state: Optional[TaskState] = None
51
+ self._cancel_requested: bool = False
52
+
53
+ self._profiling_info: Optional[ProfileResult] = None
54
+
55
+ @property
56
+ def task_id(self) -> TaskID:
57
+ return self._task_id
58
+
59
+ def profiling_info(self) -> ProfileResult:
60
+ with self._condition: # type: ignore[attr-defined]
61
+ if self._profiling_info is None:
62
+ raise ValueError(f"didn't receive profiling info for {self} yet")
63
+
64
+ return self._profiling_info
65
+
66
+ def set_result_ready(
67
+ self, object_id: Optional[ObjectID], task_state: TaskState, profile_result: Optional[ProfileResult] = None
68
+ ) -> None:
69
+ with self._condition: # type: ignore[attr-defined]
70
+ if self.done():
71
+ raise concurrent.futures.InvalidStateError(f"invalid future state: {self._state}")
72
+
73
+ self._state = "FINISHED"
74
+
75
+ self._result_object_id = object_id
76
+
77
+ self._task_state = task_state
78
+
79
+ if profile_result is not None:
80
+ self._profiling_info = profile_result
81
+
82
+ # if it's not delayed future, or if there is any listener (waiter or callback), get the result immediately
83
+ if not self._is_delayed or self._has_result_listeners():
84
+ self._get_result_object()
85
+
86
+ self._condition.notify_all() # type: ignore[attr-defined]
87
+
88
+ def set_canceled(self):
89
+ with self._condition:
90
+ if self.done():
91
+ return
92
+
93
+ self._state = "CANCELLED_AND_NOTIFIED"
94
+ self._result_received = True
95
+ self._cancel_requested = True
96
+
97
+ for waiter in self._waiters:
98
+ waiter.add_cancelled(self)
99
+
100
+ self._condition.notify_all() # type: ignore[attr-defined]
101
+
102
+ self._invoke_callbacks() # type: ignore[attr-defined]
103
+
104
+ def _set_result_or_exception(
105
+ self,
106
+ result: Optional[Any] = None,
107
+ exception: Optional[BaseException] = None,
108
+ profiling_info: Optional[ProfileResult] = None,
109
+ ) -> None:
110
+ with self._condition: # type: ignore[attr-defined]
111
+ if self.cancelled():
112
+ raise concurrent.futures.InvalidStateError(f"invalid future state: {self._state}")
113
+
114
+ if self._result_received:
115
+ raise concurrent.futures.InvalidStateError("future already received object data.")
116
+
117
+ if profiling_info is not None:
118
+ if self._profiling_info is not None:
119
+ raise concurrent.futures.InvalidStateError("cannot set profiling info twice.")
120
+
121
+ self._profiling_info = profiling_info
122
+
123
+ self._state = "FINISHED"
124
+ self._result_received = True
125
+
126
+ if exception is not None:
127
+ assert result is None
128
+ self._exception = exception
129
+ for waiter in self._waiters:
130
+ waiter.add_exception(self)
131
+ else:
132
+ self._result = result
133
+ for waiter in self._waiters:
134
+ waiter.add_result(self)
135
+
136
+ self._condition.notify_all() # type: ignore[attr-defined]
137
+
138
+ self._invoke_callbacks() # type: ignore[attr-defined]
139
+
140
+ def set_result(self, result: Any, profiling_info: Optional[ProfileResult] = None) -> None:
141
+ self._set_result_or_exception(result=result, profiling_info=profiling_info)
142
+
143
+ def set_exception(self, exception: Optional[BaseException], profiling_info: Optional[ProfileResult] = None) -> None:
144
+ self._set_result_or_exception(exception=exception, profiling_info=profiling_info)
145
+
146
+ def result(self, timeout: Optional[float] = None) -> Any:
147
+ with self._condition: # type: ignore[attr-defined]
148
+ self._wait_result_ready(timeout)
149
+
150
+ # if it's delayed future, get the result when future.result() gets called
151
+ if self._is_delayed:
152
+ self._get_result_object()
153
+
154
+ return super().result()
155
+
156
+ def exception(self, timeout: Optional[float] = None) -> Optional[BaseException]:
157
+ with self._condition: # type: ignore[attr-defined]
158
+ self._wait_result_ready(timeout)
159
+
160
+ # if it's delayed future, get the result when future.exception() gets called
161
+ if self._is_delayed:
162
+ self._get_result_object()
163
+
164
+ return super().exception()
165
+
166
+ def cancel(self, timeout: Optional[float] = None) -> bool:
167
+ with self._condition: # type: ignore[attr-defined]
168
+ if self.cancelled():
169
+ return True
170
+
171
+ if self.done():
172
+ return False
173
+
174
+ if not self._cancel_requested:
175
+ # Send cancellation request to the server
176
+ cancel_flags = TaskCancel.TaskCancelFlags(force=True)
177
+
178
+ if self._group_task_id is not None:
179
+ self._connector_agent.send(TaskCancel.new_msg(self._group_task_id, flags=cancel_flags))
180
+ else:
181
+ self._connector_agent.send(TaskCancel.new_msg(self._task_id, flags=cancel_flags))
182
+
183
+ self._cancel_requested = True
184
+
185
+ # Wait for the answer from the server, can either be a cancel confirmation, or the results if the task
186
+ # finished while being canceled.
187
+ self._wait_result_ready(timeout)
188
+
189
+ return self.cancelled()
190
+
191
+ def add_done_callback(self, fn: Callable[["ScalerFuture"], Any]) -> None:
192
+ with self._condition:
193
+ if self.done():
194
+ self._get_result_object()
195
+ else:
196
+ self._done_callbacks.append(fn) # type: ignore[attr-defined]
197
+ return
198
+
199
+ try:
200
+ fn(self)
201
+ except Exception:
202
+ concurrent.futures._base.LOGGER.exception(f"exception calling callback for {self!r}")
203
+ raise
204
+
205
+ def _on_waiters_updated(self, waiters: EventList):
206
+ with self._condition: # type: ignore[attr-defined]
207
+ # if it's delayed future, get the result when waiter gets added
208
+ if self._is_delayed and len(self._waiters) > 0:
209
+ self._get_result_object()
210
+
211
+ def _has_result_listeners(self) -> bool:
212
+ return len(self._done_callbacks) > 0 or len(self._waiters) > 0 # type: ignore[attr-defined]
213
+
214
+ def _get_result_object(self):
215
+ with self._condition: # type: ignore[attr-defined]
216
+ if self._result_object_id is None or self.cancelled() or self._result_received:
217
+ return
218
+
219
+ object_bytes = self._connector_storage.get_object(self._result_object_id)
220
+
221
+ if self._is_simple_task():
222
+ # immediately delete non graph result objects
223
+ # TODO: graph task results could also be deleted if these are not required by another task of the graph.
224
+ self._connector_storage.delete_object(self._result_object_id)
225
+
226
+ if self._task_state == TaskState.Success:
227
+ self.set_result(self._serializer.deserialize(object_bytes))
228
+ elif self._task_state == TaskState.Failed:
229
+ self.set_exception(deserialize_failure(object_bytes))
230
+ else:
231
+ raise ValueError(f"unexpected task status: {self._task_state}")
232
+
233
+ def _wait_result_ready(self, timeout: Optional[float] = None):
234
+ """
235
+ Blocks until the future is done (either successfully, or on failure/cancellation).
236
+
237
+ Raises a `TimeoutError` if it blocks more than `timeout` seconds.
238
+ """
239
+ if not self.done() and not self._condition.wait(timeout):
240
+ raise concurrent.futures.TimeoutError()
241
+
242
+ def _is_simple_task(self):
243
+ return self._group_task_id is None and self._task_id is not None
244
+
245
+ def __task_type(self) -> str:
246
+ if self._group_task_id is None:
247
+ return "SimpleTask"
248
+
249
+ if self._group_task_id == self._task_id:
250
+ return "GraphUmbrellaTask"
251
+ else:
252
+ return "GraphSubTask"
@@ -0,0 +1,129 @@
1
+ import dataclasses
2
+ import pickle
3
+ from typing import Any, Callable, List, Optional, Set
4
+
5
+ import cloudpickle
6
+
7
+ from scaler.client.serializer.mixins import Serializer
8
+ from scaler.io.mixins import SyncConnector, SyncObjectStorageConnector
9
+ from scaler.protocol.python.common import ObjectMetadata
10
+ from scaler.protocol.python.message import ObjectInstruction
11
+ from scaler.utility.identifiers import ClientID, ObjectID
12
+
13
+
14
+ @dataclasses.dataclass
15
+ class ObjectCache:
16
+ object_id: ObjectID
17
+ object_type: ObjectMetadata.ObjectContentType
18
+ object_name: bytes
19
+ object_payload: bytes
20
+
21
+
22
+ class ObjectBuffer:
23
+ def __init__(
24
+ self,
25
+ identity: ClientID,
26
+ serializer: Serializer,
27
+ connector_agent: SyncConnector,
28
+ connector_storage: SyncObjectStorageConnector,
29
+ ):
30
+ self._identity = identity
31
+ self._serializer = serializer
32
+
33
+ self._connector_agent = connector_agent
34
+ self._connector_storage = connector_storage
35
+
36
+ self._valid_object_ids: Set[ObjectID] = set()
37
+ self._pending_objects: List[ObjectCache] = list()
38
+
39
+ self._serializer_object_id = self.__send_serializer()
40
+
41
+ def buffer_send_function(self, fn: Callable) -> ObjectCache:
42
+ return self.__buffer_send_serialized_object(self.__construct_function(fn))
43
+
44
+ def buffer_send_object(self, obj: Any, name: Optional[str] = None) -> ObjectCache:
45
+ return self.__buffer_send_serialized_object(self.__construct_object(obj, name))
46
+
47
+ def commit_send_objects(self):
48
+ if not self._pending_objects:
49
+ return
50
+
51
+ object_instructions_to_send = [
52
+ (obj_cache.object_id, obj_cache.object_type, obj_cache.object_name) for obj_cache in self._pending_objects
53
+ ]
54
+
55
+ self._connector_agent.send(
56
+ ObjectInstruction.new_msg(
57
+ ObjectInstruction.ObjectInstructionType.Create,
58
+ self._identity,
59
+ ObjectMetadata.new_msg(*zip(*object_instructions_to_send)),
60
+ )
61
+ )
62
+
63
+ for obj_cache in self._pending_objects:
64
+ self._connector_storage.set_object(obj_cache.object_id, obj_cache.object_payload)
65
+
66
+ self._pending_objects.clear()
67
+
68
+ def clear(self):
69
+ """
70
+ remove all committed and pending objects.
71
+ """
72
+
73
+ self._pending_objects.clear()
74
+
75
+ # the Clear instruction does not clear the serializer.
76
+ self._valid_object_ids.clear()
77
+ self._valid_object_ids.add(self._serializer_object_id)
78
+
79
+ self._connector_agent.send(
80
+ ObjectInstruction.new_msg(
81
+ ObjectInstruction.ObjectInstructionType.Clear, self._identity, ObjectMetadata.new_msg(tuple())
82
+ )
83
+ )
84
+
85
+ def is_valid_object_id(self, object_id: ObjectID) -> bool:
86
+ return object_id in self._valid_object_ids
87
+
88
+ def __construct_serializer(self) -> ObjectCache:
89
+ serializer_payload = cloudpickle.dumps(self._serializer, protocol=pickle.HIGHEST_PROTOCOL)
90
+ object_id = ObjectID.generate_serializer_object_id(self._identity)
91
+ serializer_cache = ObjectCache(
92
+ object_id, ObjectMetadata.ObjectContentType.Serializer, b"serializer", serializer_payload
93
+ )
94
+
95
+ return serializer_cache
96
+
97
+ def __construct_function(self, fn: Callable) -> ObjectCache:
98
+ function_payload = self._serializer.serialize(fn)
99
+ object_id = ObjectID.generate_object_id(self._identity)
100
+ function_cache = ObjectCache(
101
+ object_id,
102
+ ObjectMetadata.ObjectContentType.Object,
103
+ getattr(fn, "__name__", f"<func {repr(object_id)}>").encode(),
104
+ function_payload,
105
+ )
106
+
107
+ return function_cache
108
+
109
+ def __construct_object(self, obj: Any, name: Optional[str] = None) -> ObjectCache:
110
+ object_payload = self._serializer.serialize(obj)
111
+ object_id = ObjectID.generate_object_id(self._identity)
112
+ name_bytes = name.encode() if name else f"<obj {repr(object_id)}>".encode()
113
+ object_cache = ObjectCache(object_id, ObjectMetadata.ObjectContentType.Object, name_bytes, object_payload)
114
+
115
+ return object_cache
116
+
117
+ def __buffer_send_serialized_object(self, object_cache: ObjectCache) -> ObjectCache:
118
+ if object_cache.object_id not in self._valid_object_ids:
119
+ self._pending_objects.append(object_cache)
120
+ self._valid_object_ids.add(object_cache.object_id)
121
+
122
+ return object_cache
123
+
124
+ def __send_serializer(self) -> ObjectID:
125
+ serialized_serializer = self.__construct_serializer()
126
+ self.__buffer_send_serialized_object(serialized_serializer)
127
+ self.commit_send_objects()
128
+
129
+ return serialized_serializer.object_id
@@ -0,0 +1,25 @@
1
+ import dataclasses
2
+
3
+ from scaler.utility.identifiers import ObjectID
4
+
5
+
6
+ @dataclasses.dataclass
7
+ class ObjectReference:
8
+ name: bytes
9
+ size: int
10
+ object_id: ObjectID
11
+
12
+ def __repr__(self):
13
+ return f"ObjectReference(name={self.name!r}, size={self.size} bytes, id={self.object_id!r})"
14
+
15
+ def __hash__(self):
16
+ return hash(self.object_id)
17
+
18
+ def __eq__(self, other: object) -> bool:
19
+ if not isinstance(other, ObjectReference):
20
+ return NotImplemented
21
+
22
+ return self.object_id == other.object_id
23
+
24
+ def __ne__(self, other):
25
+ return not self.__eq__(other)
File without changes
@@ -0,0 +1,16 @@
1
+ import pickle
2
+ from typing import Any
3
+
4
+ import cloudpickle
5
+
6
+ from scaler.client.serializer.mixins import Serializer
7
+
8
+
9
+ class DefaultSerializer(Serializer):
10
+ @staticmethod
11
+ def serialize(obj: Any) -> bytes:
12
+ return cloudpickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL)
13
+
14
+ @staticmethod
15
+ def deserialize(payload: bytes) -> Any:
16
+ return cloudpickle.loads(payload)
@@ -0,0 +1,38 @@
1
+ import abc
2
+ from typing import Any
3
+
4
+
5
+ class Serializer(metaclass=abc.ABCMeta):
6
+ @staticmethod
7
+ @abc.abstractmethod
8
+ def serialize(obj: Any) -> bytes:
9
+ """
10
+ Serialize the object to bytes, this serialization method is used to call for function object and EACH argument
11
+ object and function result object, for example:
12
+
13
+ def add(a, b):
14
+ return a + b
15
+
16
+ client.submit(add, 1, 2)
17
+
18
+ The add function and the arguments 1 and 2 will be serialized and sent to the worker, and the result of the a+b
19
+ will be serialized and sent back to the client, client will use deserialize function below to deserialize
20
+
21
+ :param obj: the object to be serialized, can be function object, argument object, or function result object
22
+ :return: serialized bytes of the object
23
+ """
24
+
25
+ raise NotImplementedError()
26
+
27
+ @staticmethod
28
+ @abc.abstractmethod
29
+ def deserialize(payload: bytes) -> Any:
30
+ """
31
+ Deserialize the bytes to the original object, this de-serialize method is used to deserialize the function
32
+ object bytes and EACH serialized argument and serialized function result.
33
+
34
+ :param payload: the serialized bytes of the object, can be function object, argument object, or function result
35
+ object
36
+ :return: any deserialized object
37
+ """
38
+ raise NotImplementedError()
File without changes
@@ -0,0 +1,115 @@
1
+ import logging
2
+ import multiprocessing
3
+ import os
4
+ import signal
5
+ from typing import Dict, List, Optional, Tuple
6
+
7
+ from scaler.config.types.object_storage_server import ObjectStorageConfig
8
+ from scaler.config.types.zmq import ZMQConfig
9
+ from scaler.utility.logging.utility import setup_logger
10
+ from scaler.worker.worker import Worker
11
+
12
+
13
+ class Cluster(multiprocessing.get_context("spawn").Process): # type: ignore[misc]
14
+
15
+ def __init__(
16
+ self,
17
+ address: ZMQConfig,
18
+ object_storage_address: Optional[ObjectStorageConfig],
19
+ preload: Optional[str],
20
+ worker_io_threads: int,
21
+ worker_names: List[str],
22
+ per_worker_capabilities: Dict[str, int],
23
+ per_worker_task_queue_size: int,
24
+ heartbeat_interval_seconds: int,
25
+ task_timeout_seconds: int,
26
+ death_timeout_seconds: int,
27
+ garbage_collect_interval_seconds: int,
28
+ trim_memory_threshold_bytes: int,
29
+ hard_processor_suspend: bool,
30
+ event_loop: str,
31
+ logging_paths: Tuple[str, ...],
32
+ logging_config_file: Optional[str],
33
+ logging_level: str,
34
+ ):
35
+ multiprocessing.Process.__init__(self, name="WorkerMaster")
36
+
37
+ self._address = address
38
+ self._object_storage_address = object_storage_address
39
+ self._preload = preload
40
+ self._worker_io_threads = worker_io_threads
41
+ self._worker_names = worker_names
42
+ self._per_worker_capabilities = per_worker_capabilities
43
+
44
+ self._per_worker_task_queue_size = per_worker_task_queue_size
45
+ self._heartbeat_interval_seconds = heartbeat_interval_seconds
46
+ self._task_timeout_seconds = task_timeout_seconds
47
+ self._death_timeout_seconds = death_timeout_seconds
48
+ self._garbage_collect_interval_seconds = garbage_collect_interval_seconds
49
+ self._trim_memory_threshold_bytes = trim_memory_threshold_bytes
50
+ self._hard_processor_suspend = hard_processor_suspend
51
+ self._event_loop = event_loop
52
+
53
+ self._logging_paths = logging_paths
54
+ self._logging_config_file = logging_config_file
55
+ self._logging_level = logging_level
56
+
57
+ self._workers: List[Worker] = []
58
+
59
+ def run(self):
60
+ setup_logger(self._logging_paths, self._logging_config_file, self._logging_level)
61
+ self.__register_signal()
62
+ self.__start_workers_and_run_forever()
63
+
64
+ def __destroy(self, *args):
65
+ assert args is not None
66
+ logging.info(f"{self.__get_prefix()} received signal, shutting down")
67
+ for worker in self._workers:
68
+ logging.info(f"{self.__get_prefix()} shutting down {worker.identity!r}")
69
+ os.kill(worker.pid, signal.SIGINT)
70
+
71
+ def __register_signal(self):
72
+ signal.signal(signal.SIGINT, self.__destroy)
73
+ signal.signal(signal.SIGTERM, self.__destroy)
74
+
75
+ def __start_workers_and_run_forever(self):
76
+ logging.info(
77
+ f"{self.__get_prefix()} starting {len(self._worker_names)} workers, heartbeat_interval_seconds="
78
+ f"{self._heartbeat_interval_seconds}, task_timeout_seconds={self._task_timeout_seconds}"
79
+ )
80
+
81
+ self._workers = [
82
+ Worker(
83
+ event_loop=self._event_loop,
84
+ name=name,
85
+ address=self._address,
86
+ object_storage_address=self._object_storage_address,
87
+ capabilities=self._per_worker_capabilities,
88
+ preload=self._preload,
89
+ io_threads=self._worker_io_threads,
90
+ task_queue_size=self._per_worker_task_queue_size,
91
+ heartbeat_interval_seconds=self._heartbeat_interval_seconds,
92
+ garbage_collect_interval_seconds=self._garbage_collect_interval_seconds,
93
+ trim_memory_threshold_bytes=self._trim_memory_threshold_bytes,
94
+ task_timeout_seconds=self._task_timeout_seconds,
95
+ death_timeout_seconds=self._death_timeout_seconds,
96
+ hard_processor_suspend=self._hard_processor_suspend,
97
+ logging_paths=self._logging_paths,
98
+ logging_level=self._logging_level,
99
+ )
100
+ for name in self._worker_names
101
+ ]
102
+
103
+ for worker in self._workers:
104
+ worker.start()
105
+
106
+ for worker in self._workers:
107
+ logging.info(f"{worker.identity!r} started")
108
+
109
+ for worker in self._workers:
110
+ worker.join()
111
+
112
+ logging.info(f"{self.__get_prefix()} shutdown")
113
+
114
+ def __get_prefix(self):
115
+ return f"{self.__class__.__name__}:"