opengris-scaler 1.12.7__cp311-cp311-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of opengris-scaler might be problematic. Click here for more details.

Files changed (234) hide show
  1. opengris_scaler-1.12.7.dist-info/METADATA +729 -0
  2. opengris_scaler-1.12.7.dist-info/RECORD +234 -0
  3. opengris_scaler-1.12.7.dist-info/WHEEL +5 -0
  4. opengris_scaler-1.12.7.dist-info/entry_points.txt +9 -0
  5. opengris_scaler-1.12.7.dist-info/licenses/LICENSE +201 -0
  6. opengris_scaler-1.12.7.dist-info/licenses/LICENSE.spdx +7 -0
  7. opengris_scaler-1.12.7.dist-info/licenses/NOTICE +8 -0
  8. opengris_scaler.libs/libcapnp-1-61c06778.1.0.so +0 -0
  9. opengris_scaler.libs/libgcc_s-2298274a.so.1 +0 -0
  10. opengris_scaler.libs/libkj-1-21b63b70.1.0.so +0 -0
  11. opengris_scaler.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
  12. scaler/CMakeLists.txt +11 -0
  13. scaler/__init__.py +14 -0
  14. scaler/about.py +5 -0
  15. scaler/client/__init__.py +0 -0
  16. scaler/client/agent/__init__.py +0 -0
  17. scaler/client/agent/client_agent.py +210 -0
  18. scaler/client/agent/disconnect_manager.py +27 -0
  19. scaler/client/agent/future_manager.py +112 -0
  20. scaler/client/agent/heartbeat_manager.py +74 -0
  21. scaler/client/agent/mixins.py +89 -0
  22. scaler/client/agent/object_manager.py +98 -0
  23. scaler/client/agent/task_manager.py +64 -0
  24. scaler/client/client.py +635 -0
  25. scaler/client/future.py +252 -0
  26. scaler/client/object_buffer.py +129 -0
  27. scaler/client/object_reference.py +25 -0
  28. scaler/client/serializer/__init__.py +0 -0
  29. scaler/client/serializer/default.py +16 -0
  30. scaler/client/serializer/mixins.py +38 -0
  31. scaler/cluster/__init__.py +0 -0
  32. scaler/cluster/cluster.py +115 -0
  33. scaler/cluster/combo.py +148 -0
  34. scaler/cluster/object_storage_server.py +45 -0
  35. scaler/cluster/scheduler.py +83 -0
  36. scaler/config/__init__.py +0 -0
  37. scaler/config/defaults.py +87 -0
  38. scaler/config/loader.py +95 -0
  39. scaler/config/mixins.py +15 -0
  40. scaler/config/section/__init__.py +0 -0
  41. scaler/config/section/cluster.py +56 -0
  42. scaler/config/section/native_worker_adapter.py +44 -0
  43. scaler/config/section/object_storage_server.py +7 -0
  44. scaler/config/section/scheduler.py +53 -0
  45. scaler/config/section/symphony_worker_adapter.py +47 -0
  46. scaler/config/section/top.py +13 -0
  47. scaler/config/section/webui.py +16 -0
  48. scaler/config/types/__init__.py +0 -0
  49. scaler/config/types/object_storage_server.py +45 -0
  50. scaler/config/types/worker.py +57 -0
  51. scaler/config/types/zmq.py +79 -0
  52. scaler/entry_points/__init__.py +0 -0
  53. scaler/entry_points/cluster.py +133 -0
  54. scaler/entry_points/object_storage_server.py +41 -0
  55. scaler/entry_points/scheduler.py +135 -0
  56. scaler/entry_points/top.py +286 -0
  57. scaler/entry_points/webui.py +26 -0
  58. scaler/entry_points/worker_adapter_native.py +137 -0
  59. scaler/entry_points/worker_adapter_symphony.py +102 -0
  60. scaler/io/__init__.py +0 -0
  61. scaler/io/async_binder.py +85 -0
  62. scaler/io/async_connector.py +95 -0
  63. scaler/io/async_object_storage_connector.py +185 -0
  64. scaler/io/mixins.py +154 -0
  65. scaler/io/sync_connector.py +68 -0
  66. scaler/io/sync_object_storage_connector.py +185 -0
  67. scaler/io/sync_subscriber.py +83 -0
  68. scaler/io/utility.py +31 -0
  69. scaler/io/ymq/CMakeLists.txt +98 -0
  70. scaler/io/ymq/__init__.py +0 -0
  71. scaler/io/ymq/_ymq.pyi +96 -0
  72. scaler/io/ymq/_ymq.so +0 -0
  73. scaler/io/ymq/bytes.h +114 -0
  74. scaler/io/ymq/common.h +29 -0
  75. scaler/io/ymq/configuration.h +60 -0
  76. scaler/io/ymq/epoll_context.cpp +185 -0
  77. scaler/io/ymq/epoll_context.h +85 -0
  78. scaler/io/ymq/error.h +132 -0
  79. scaler/io/ymq/event_loop.h +55 -0
  80. scaler/io/ymq/event_loop_thread.cpp +64 -0
  81. scaler/io/ymq/event_loop_thread.h +46 -0
  82. scaler/io/ymq/event_manager.h +81 -0
  83. scaler/io/ymq/file_descriptor.h +203 -0
  84. scaler/io/ymq/interruptive_concurrent_queue.h +169 -0
  85. scaler/io/ymq/io_context.cpp +98 -0
  86. scaler/io/ymq/io_context.h +44 -0
  87. scaler/io/ymq/io_socket.cpp +299 -0
  88. scaler/io/ymq/io_socket.h +121 -0
  89. scaler/io/ymq/iocp_context.cpp +102 -0
  90. scaler/io/ymq/iocp_context.h +83 -0
  91. scaler/io/ymq/logging.h +163 -0
  92. scaler/io/ymq/message.h +15 -0
  93. scaler/io/ymq/message_connection.h +16 -0
  94. scaler/io/ymq/message_connection_tcp.cpp +672 -0
  95. scaler/io/ymq/message_connection_tcp.h +96 -0
  96. scaler/io/ymq/network_utils.h +179 -0
  97. scaler/io/ymq/pymod_ymq/bytes.h +113 -0
  98. scaler/io/ymq/pymod_ymq/exception.h +124 -0
  99. scaler/io/ymq/pymod_ymq/gil.h +15 -0
  100. scaler/io/ymq/pymod_ymq/io_context.h +166 -0
  101. scaler/io/ymq/pymod_ymq/io_socket.h +285 -0
  102. scaler/io/ymq/pymod_ymq/message.h +99 -0
  103. scaler/io/ymq/pymod_ymq/python.h +153 -0
  104. scaler/io/ymq/pymod_ymq/ymq.cpp +23 -0
  105. scaler/io/ymq/pymod_ymq/ymq.h +357 -0
  106. scaler/io/ymq/readme.md +114 -0
  107. scaler/io/ymq/simple_interface.cpp +80 -0
  108. scaler/io/ymq/simple_interface.h +24 -0
  109. scaler/io/ymq/tcp_client.cpp +367 -0
  110. scaler/io/ymq/tcp_client.h +75 -0
  111. scaler/io/ymq/tcp_operations.h +41 -0
  112. scaler/io/ymq/tcp_server.cpp +410 -0
  113. scaler/io/ymq/tcp_server.h +79 -0
  114. scaler/io/ymq/third_party/concurrentqueue.h +3747 -0
  115. scaler/io/ymq/timed_queue.h +272 -0
  116. scaler/io/ymq/timestamp.h +102 -0
  117. scaler/io/ymq/typedefs.h +20 -0
  118. scaler/io/ymq/utils.h +34 -0
  119. scaler/io/ymq/ymq.py +130 -0
  120. scaler/object_storage/CMakeLists.txt +50 -0
  121. scaler/object_storage/__init__.py +0 -0
  122. scaler/object_storage/constants.h +11 -0
  123. scaler/object_storage/defs.h +14 -0
  124. scaler/object_storage/io_helper.cpp +44 -0
  125. scaler/object_storage/io_helper.h +9 -0
  126. scaler/object_storage/message.cpp +56 -0
  127. scaler/object_storage/message.h +130 -0
  128. scaler/object_storage/object_manager.cpp +126 -0
  129. scaler/object_storage/object_manager.h +52 -0
  130. scaler/object_storage/object_storage_server.cpp +359 -0
  131. scaler/object_storage/object_storage_server.h +126 -0
  132. scaler/object_storage/object_storage_server.so +0 -0
  133. scaler/object_storage/pymod_object_storage_server.cpp +104 -0
  134. scaler/protocol/__init__.py +0 -0
  135. scaler/protocol/capnp/__init__.py +0 -0
  136. scaler/protocol/capnp/_python.py +6 -0
  137. scaler/protocol/capnp/common.capnp +63 -0
  138. scaler/protocol/capnp/message.capnp +216 -0
  139. scaler/protocol/capnp/object_storage.capnp +52 -0
  140. scaler/protocol/capnp/status.capnp +73 -0
  141. scaler/protocol/introduction.md +105 -0
  142. scaler/protocol/python/__init__.py +0 -0
  143. scaler/protocol/python/common.py +135 -0
  144. scaler/protocol/python/message.py +726 -0
  145. scaler/protocol/python/mixins.py +13 -0
  146. scaler/protocol/python/object_storage.py +118 -0
  147. scaler/protocol/python/status.py +279 -0
  148. scaler/protocol/worker.md +228 -0
  149. scaler/scheduler/__init__.py +0 -0
  150. scaler/scheduler/allocate_policy/__init__.py +0 -0
  151. scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
  152. scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
  153. scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
  154. scaler/scheduler/allocate_policy/mixins.py +55 -0
  155. scaler/scheduler/controllers/__init__.py +0 -0
  156. scaler/scheduler/controllers/balance_controller.py +65 -0
  157. scaler/scheduler/controllers/client_controller.py +131 -0
  158. scaler/scheduler/controllers/config_controller.py +31 -0
  159. scaler/scheduler/controllers/graph_controller.py +424 -0
  160. scaler/scheduler/controllers/information_controller.py +81 -0
  161. scaler/scheduler/controllers/mixins.py +201 -0
  162. scaler/scheduler/controllers/object_controller.py +147 -0
  163. scaler/scheduler/controllers/scaling_controller.py +86 -0
  164. scaler/scheduler/controllers/task_controller.py +373 -0
  165. scaler/scheduler/controllers/worker_controller.py +168 -0
  166. scaler/scheduler/object_usage/__init__.py +0 -0
  167. scaler/scheduler/object_usage/object_tracker.py +131 -0
  168. scaler/scheduler/scheduler.py +253 -0
  169. scaler/scheduler/task/__init__.py +0 -0
  170. scaler/scheduler/task/task_state_machine.py +92 -0
  171. scaler/scheduler/task/task_state_manager.py +61 -0
  172. scaler/ui/__init__.py +0 -0
  173. scaler/ui/constants.py +9 -0
  174. scaler/ui/live_display.py +118 -0
  175. scaler/ui/memory_window.py +146 -0
  176. scaler/ui/setting_page.py +47 -0
  177. scaler/ui/task_graph.py +370 -0
  178. scaler/ui/task_log.py +83 -0
  179. scaler/ui/utility.py +35 -0
  180. scaler/ui/webui.py +125 -0
  181. scaler/ui/worker_processors.py +85 -0
  182. scaler/utility/__init__.py +0 -0
  183. scaler/utility/debug.py +19 -0
  184. scaler/utility/event_list.py +63 -0
  185. scaler/utility/event_loop.py +58 -0
  186. scaler/utility/exceptions.py +42 -0
  187. scaler/utility/formatter.py +44 -0
  188. scaler/utility/graph/__init__.py +0 -0
  189. scaler/utility/graph/optimization.py +27 -0
  190. scaler/utility/graph/topological_sorter.py +11 -0
  191. scaler/utility/graph/topological_sorter_graphblas.py +174 -0
  192. scaler/utility/identifiers.py +105 -0
  193. scaler/utility/logging/__init__.py +0 -0
  194. scaler/utility/logging/decorators.py +25 -0
  195. scaler/utility/logging/scoped_logger.py +33 -0
  196. scaler/utility/logging/utility.py +183 -0
  197. scaler/utility/many_to_many_dict.py +123 -0
  198. scaler/utility/metadata/__init__.py +0 -0
  199. scaler/utility/metadata/profile_result.py +31 -0
  200. scaler/utility/metadata/task_flags.py +30 -0
  201. scaler/utility/mixins.py +13 -0
  202. scaler/utility/network_util.py +7 -0
  203. scaler/utility/one_to_many_dict.py +72 -0
  204. scaler/utility/queues/__init__.py +0 -0
  205. scaler/utility/queues/async_indexed_queue.py +37 -0
  206. scaler/utility/queues/async_priority_queue.py +70 -0
  207. scaler/utility/queues/async_sorted_priority_queue.py +45 -0
  208. scaler/utility/queues/indexed_queue.py +114 -0
  209. scaler/utility/serialization.py +9 -0
  210. scaler/version.txt +1 -0
  211. scaler/worker/__init__.py +0 -0
  212. scaler/worker/agent/__init__.py +0 -0
  213. scaler/worker/agent/heartbeat_manager.py +107 -0
  214. scaler/worker/agent/mixins.py +137 -0
  215. scaler/worker/agent/processor/__init__.py +0 -0
  216. scaler/worker/agent/processor/object_cache.py +107 -0
  217. scaler/worker/agent/processor/processor.py +279 -0
  218. scaler/worker/agent/processor/streaming_buffer.py +28 -0
  219. scaler/worker/agent/processor_holder.py +145 -0
  220. scaler/worker/agent/processor_manager.py +365 -0
  221. scaler/worker/agent/profiling_manager.py +109 -0
  222. scaler/worker/agent/task_manager.py +150 -0
  223. scaler/worker/agent/timeout_manager.py +19 -0
  224. scaler/worker/preload.py +84 -0
  225. scaler/worker/worker.py +264 -0
  226. scaler/worker_adapter/__init__.py +0 -0
  227. scaler/worker_adapter/native.py +154 -0
  228. scaler/worker_adapter/symphony/__init__.py +0 -0
  229. scaler/worker_adapter/symphony/callback.py +45 -0
  230. scaler/worker_adapter/symphony/heartbeat_manager.py +79 -0
  231. scaler/worker_adapter/symphony/message.py +24 -0
  232. scaler/worker_adapter/symphony/task_manager.py +288 -0
  233. scaler/worker_adapter/symphony/worker.py +205 -0
  234. scaler/worker_adapter/symphony/worker_adapter.py +142 -0
@@ -0,0 +1,279 @@
1
+ import contextlib
2
+ import logging
3
+ import multiprocessing
4
+ import os
5
+ import signal
6
+ from contextlib import redirect_stderr, redirect_stdout
7
+ from contextvars import ContextVar, Token
8
+ from multiprocessing.synchronize import Event as EventType
9
+ from typing import IO, Callable, List, Optional, Tuple, cast
10
+
11
+ import tblib.pickling_support
12
+ import zmq
13
+
14
+ from scaler.config.types.object_storage_server import ObjectStorageConfig
15
+ from scaler.config.types.zmq import ZMQConfig
16
+ from scaler.io.mixins import SyncConnector, SyncObjectStorageConnector
17
+ from scaler.io.sync_connector import ZMQSyncConnector
18
+ from scaler.io.sync_object_storage_connector import PySyncObjectStorageConnector
19
+ from scaler.protocol.python.common import ObjectMetadata, TaskResultType
20
+ from scaler.protocol.python.message import ObjectInstruction, ProcessorInitialized, Task, TaskLog, TaskResult
21
+ from scaler.protocol.python.mixins import Message
22
+ from scaler.utility.identifiers import ClientID, ObjectID, TaskID
23
+ from scaler.utility.logging.utility import setup_logger
24
+ from scaler.utility.metadata.task_flags import retrieve_task_flags_from_task
25
+ from scaler.utility.serialization import serialize_failure
26
+ from scaler.worker.agent.processor.object_cache import ObjectCache
27
+ from scaler.worker.agent.processor.streaming_buffer import StreamingBuffer
28
+ from scaler.worker.preload import execute_preload
29
+
30
+ SUSPEND_SIGNAL = "SIGUSR1" # use str instead of a signal.Signal to not trigger an import error on unsupported systems.
31
+
32
+ _current_processor: ContextVar[Optional["Processor"]] = ContextVar("_current_processor", default=None)
33
+
34
+
35
+ class Processor(multiprocessing.get_context("spawn").Process): # type: ignore
36
+ def __init__(
37
+ self,
38
+ event_loop: str,
39
+ agent_address: ZMQConfig,
40
+ storage_address: ObjectStorageConfig,
41
+ preload: Optional[str],
42
+ resume_event: Optional[EventType],
43
+ resumed_event: Optional[EventType],
44
+ garbage_collect_interval_seconds: int,
45
+ trim_memory_threshold_bytes: int,
46
+ logging_paths: Tuple[str, ...],
47
+ logging_level: str,
48
+ ):
49
+ multiprocessing.Process.__init__(self, name="Processor")
50
+
51
+ self._event_loop = event_loop
52
+ self._agent_address = agent_address
53
+ self._storage_address = storage_address
54
+ self._preload = preload
55
+
56
+ self._resume_event = resume_event
57
+ self._resumed_event = resumed_event
58
+
59
+ self._garbage_collect_interval_seconds = garbage_collect_interval_seconds
60
+ self._trim_memory_threshold_bytes = trim_memory_threshold_bytes
61
+ self._logging_paths = logging_paths
62
+ self._logging_level = logging_level
63
+
64
+ self._object_cache: Optional[ObjectCache] = None
65
+
66
+ self._current_task: Optional[Task] = None
67
+
68
+ def run(self) -> None:
69
+ self.__initialize()
70
+ self.__run_forever()
71
+
72
+ @staticmethod
73
+ def get_current_processor() -> Optional["Processor"]:
74
+ """Returns the current Processor instance controlling the current process, if any."""
75
+ return _current_processor.get()
76
+
77
+ def current_task(self) -> Optional[Task]:
78
+ return self._current_task
79
+
80
+ def __initialize(self):
81
+ # modify the logging path and add process id to the path
82
+ logging_paths = [f"{path}-{os.getpid()}" for path in self._logging_paths if path != "/dev/stdout"]
83
+ if "/dev/stdout" in self._logging_paths:
84
+ logging_paths.append("/dev/stdout")
85
+
86
+ setup_logger(log_paths=tuple(logging_paths), logging_level=self._logging_level)
87
+ tblib.pickling_support.install()
88
+
89
+ self._connector_agent: SyncConnector = ZMQSyncConnector(
90
+ context=zmq.Context(), socket_type=zmq.DEALER, address=self._agent_address, identity=None
91
+ )
92
+ self._connector_storage: SyncObjectStorageConnector = PySyncObjectStorageConnector(
93
+ self._storage_address.host, self._storage_address.port
94
+ )
95
+
96
+ self._object_cache = ObjectCache(
97
+ garbage_collect_interval_seconds=self._garbage_collect_interval_seconds,
98
+ trim_memory_threshold_bytes=self._trim_memory_threshold_bytes,
99
+ )
100
+ self._object_cache.start()
101
+
102
+ self.__register_signals()
103
+
104
+ # Execute optional preload hook if provided
105
+ if self._preload is not None:
106
+ try:
107
+ execute_preload(self._preload)
108
+ except Exception as e:
109
+ raise RuntimeError(
110
+ f"Processor[{self.pid}] initialization failed due to preload error: {self._preload}"
111
+ ) from e
112
+
113
+ def __register_signals(self):
114
+ self.__register_signal("SIGTERM", self.__interrupt)
115
+
116
+ if self._resume_event is not None:
117
+ self.__register_signal(SUSPEND_SIGNAL, self.__suspend)
118
+
119
+ def __interrupt(self, *args):
120
+ self._connector_agent.destroy() # interrupts any blocking socket.
121
+
122
+ def __suspend(self, *args):
123
+ assert self._resume_event is not None
124
+ assert self._resumed_event is not None
125
+
126
+ self._resume_event.wait() # stops any computation in the main thread until the event is triggered
127
+
128
+ # Ensures the processor agent knows we stopped waiting on `_resume_event`, as to avoid re-entrant wait on the
129
+ # event.
130
+ self._resumed_event.set()
131
+
132
+ def __run_forever(self):
133
+ try:
134
+ self._connector_agent.send(ProcessorInitialized.new_msg())
135
+ while True:
136
+ message = self._connector_agent.receive()
137
+ if message is None:
138
+ continue
139
+
140
+ self.__on_connector_receive(message)
141
+
142
+ except zmq.error.ZMQError as e:
143
+ if e.errno != zmq.ENOTSOCK: # ignore if socket got closed
144
+ raise
145
+
146
+ except (KeyboardInterrupt, InterruptedError):
147
+ pass
148
+
149
+ except Exception as e:
150
+ logging.exception(f"Processor[{self.pid}]: failed with unhandled exception:\n{e}")
151
+
152
+ finally:
153
+ self._object_cache.destroy()
154
+ self._connector_agent.destroy()
155
+
156
+ self._object_cache.join()
157
+
158
+ def __on_connector_receive(self, message: Message):
159
+ if isinstance(message, ObjectInstruction):
160
+ self.__on_receive_object_instruction(message)
161
+ return
162
+
163
+ if isinstance(message, Task):
164
+ self.__on_received_task(message)
165
+ return
166
+
167
+ logging.error(f"unknown {message=}")
168
+
169
+ def __on_receive_object_instruction(self, instruction: ObjectInstruction):
170
+ if instruction.instruction_type == ObjectInstruction.ObjectInstructionType.Delete:
171
+ for object_id in instruction.object_metadata.object_ids:
172
+ self._object_cache.del_object(object_id)
173
+ return
174
+
175
+ logging.error(f"worker received unknown object instruction type {instruction=}")
176
+
177
+ def __on_received_task(self, task: Task):
178
+ self._current_task = task
179
+
180
+ self.__cache_required_object_ids(task)
181
+
182
+ self.__process_task(task)
183
+
184
+ def __cache_required_object_ids(self, task: Task) -> None:
185
+ required_object_ids = self.__get_required_object_ids_for_task(task)
186
+
187
+ for object_id in required_object_ids:
188
+ if self._object_cache.has_object(object_id):
189
+ continue
190
+
191
+ object_content = self._connector_storage.get_object(object_id)
192
+ self._object_cache.add_object(task.source, object_id, object_content)
193
+
194
+ @staticmethod
195
+ def __get_required_object_ids_for_task(task: Task) -> List[ObjectID]:
196
+ serializer_id = ObjectID.generate_serializer_object_id(task.source)
197
+ object_ids = [
198
+ serializer_id,
199
+ task.func_object_id,
200
+ *(cast(ObjectID, argument) for argument in task.function_args),
201
+ ]
202
+ return object_ids
203
+
204
+ def __process_task(self, task: Task):
205
+ task_flags = retrieve_task_flags_from_task(task)
206
+
207
+ try:
208
+ function = self._object_cache.get_object(task.func_object_id)
209
+
210
+ args = [self._object_cache.get_object(cast(ObjectID, arg)) for arg in task.function_args]
211
+
212
+ if task_flags.stream_output:
213
+ with StreamingBuffer(
214
+ task.task_id, TaskLog.LogType.Stdout, self._connector_agent
215
+ ) as stdout_buf, StreamingBuffer(
216
+ task.task_id, TaskLog.LogType.Stderr, self._connector_agent
217
+ ) as stderr_buf, self.__processor_context(), redirect_stdout(
218
+ cast(IO[str], stdout_buf)
219
+ ), redirect_stderr(
220
+ cast(IO[str], stderr_buf)
221
+ ):
222
+ result = function(*args)
223
+ else:
224
+ with self.__processor_context():
225
+ result = function(*args)
226
+
227
+ result_bytes = self._object_cache.serialize(task.source, result)
228
+ task_result_type = TaskResultType.Success
229
+
230
+ except Exception as e:
231
+ logging.exception(f"exception when processing task_id={task.task_id.hex()}:")
232
+ task_result_type = TaskResultType.Failed
233
+ result_bytes = serialize_failure(e)
234
+
235
+ self.__send_result(task.source, task.task_id, task_result_type, result_bytes)
236
+
237
+ def __send_result(self, source: ClientID, task_id: TaskID, task_result_type: TaskResultType, result_bytes: bytes):
238
+ self._current_task = None
239
+
240
+ result_object_id = ObjectID.generate_object_id(source)
241
+
242
+ self._connector_storage.set_object(result_object_id, result_bytes)
243
+ self._connector_agent.send(
244
+ ObjectInstruction.new_msg(
245
+ ObjectInstruction.ObjectInstructionType.Create,
246
+ source,
247
+ ObjectMetadata.new_msg(
248
+ (result_object_id,),
249
+ (ObjectMetadata.ObjectContentType.Object,),
250
+ (f"<res {repr(result_object_id)}>".encode(),),
251
+ ),
252
+ )
253
+ )
254
+ self._connector_agent.send(
255
+ TaskResult.new_msg(task_id, task_result_type, metadata=b"", results=[bytes(result_object_id)])
256
+ )
257
+
258
+ @staticmethod
259
+ def __set_current_processor(context: Optional["Processor"]) -> Token:
260
+ if context is not None and _current_processor.get() is not None:
261
+ raise ValueError("cannot override a previously set processor context.")
262
+
263
+ return _current_processor.set(context)
264
+
265
+ @contextlib.contextmanager
266
+ def __processor_context(self):
267
+ self.__set_current_processor(self)
268
+ try:
269
+ yield
270
+ finally:
271
+ self.__set_current_processor(None)
272
+
273
+ @staticmethod
274
+ def __register_signal(signal_name: str, handler: Callable) -> None:
275
+ signal_instance = getattr(signal, signal_name, None)
276
+ if signal_instance is None:
277
+ raise RuntimeError(f"unsupported platform, signal not available: {signal_name}.")
278
+
279
+ signal.signal(signal_instance, handler)
@@ -0,0 +1,28 @@
1
+ import io
2
+ import logging
3
+
4
+ from scaler.io.mixins import SyncConnector
5
+ from scaler.protocol.python.message import TaskLog
6
+ from scaler.utility.identifiers import TaskID
7
+
8
+
9
+ class StreamingBuffer(io.TextIOBase):
10
+ """A custom IO buffer that sends content as it's written."""
11
+
12
+ def __init__(self, task_id: TaskID, log_type: TaskLog.LogType, connector_agent: SyncConnector):
13
+ super().__init__()
14
+ self._task_id = task_id
15
+ self._log_type = log_type
16
+ self._connector_agent = connector_agent
17
+
18
+ def write(self, content: str) -> int:
19
+ if self.closed:
20
+ return 0
21
+
22
+ if content:
23
+ try:
24
+ self._connector_agent.send(TaskLog.new_msg(self._task_id, self._log_type, content))
25
+ except Exception as e:
26
+ logging.warning(f"Failed to send stream content: {e}")
27
+
28
+ return 0
@@ -0,0 +1,145 @@
1
+ import logging
2
+ import multiprocessing
3
+ import os
4
+ import signal
5
+ from typing import Optional, Tuple
6
+
7
+ import psutil
8
+
9
+ from scaler.config.defaults import DEFAULT_PROCESSOR_KILL_DELAY_SECONDS
10
+ from scaler.protocol.python.message import Task
11
+ from scaler.utility.identifiers import ProcessorID
12
+ from scaler.config.types.object_storage_server import ObjectStorageConfig
13
+ from scaler.config.types.zmq import ZMQConfig
14
+ from scaler.worker.agent.processor.processor import SUSPEND_SIGNAL, Processor
15
+
16
+
17
+ class ProcessorHolder:
18
+ def __init__(
19
+ self,
20
+ event_loop: str,
21
+ agent_address: ZMQConfig,
22
+ storage_address: ObjectStorageConfig,
23
+ preload: Optional[str],
24
+ garbage_collect_interval_seconds: int,
25
+ trim_memory_threshold_bytes: int,
26
+ hard_suspend: bool,
27
+ logging_paths: Tuple[str, ...],
28
+ logging_level: str,
29
+ ):
30
+ self._processor_id: Optional[ProcessorID] = None
31
+ self._task: Optional[Task] = None
32
+ self._suspended = False
33
+
34
+ self._hard_suspend = hard_suspend
35
+ if hard_suspend:
36
+ self._resume_event = None
37
+ self._resumed_event = None
38
+ else:
39
+ context = multiprocessing.get_context("spawn")
40
+ self._resume_event = context.Event()
41
+ self._resumed_event = context.Event()
42
+
43
+ self._processor = Processor(
44
+ event_loop=event_loop,
45
+ agent_address=agent_address,
46
+ storage_address=storage_address,
47
+ preload=preload,
48
+ resume_event=self._resume_event,
49
+ resumed_event=self._resumed_event,
50
+ garbage_collect_interval_seconds=garbage_collect_interval_seconds,
51
+ trim_memory_threshold_bytes=trim_memory_threshold_bytes,
52
+ logging_paths=logging_paths,
53
+ logging_level=logging_level,
54
+ )
55
+ self._processor.start()
56
+ self._process = psutil.Process(self._processor.pid)
57
+
58
+ def pid(self) -> int:
59
+ assert self._processor.pid is not None
60
+ return self._processor.pid
61
+
62
+ def process(self) -> psutil.Process:
63
+ return self._process
64
+
65
+ def processor_id(self) -> ProcessorID:
66
+ assert self._processor_id is not None
67
+ return self._processor_id
68
+
69
+ def initialized(self) -> bool:
70
+ return self._processor_id is not None
71
+
72
+ def initialize(self, processor_id: ProcessorID):
73
+ self._processor_id = processor_id
74
+
75
+ def task(self) -> Optional[Task]:
76
+ return self._task
77
+
78
+ def set_task(self, task: Optional[Task]):
79
+ self._task = task
80
+
81
+ def suspended(self) -> bool:
82
+ return self._suspended
83
+
84
+ def suspend(self):
85
+ assert self._processor is not None
86
+ assert self._task is not None
87
+ assert self._suspended is False
88
+ assert self.initialized()
89
+
90
+ if self._hard_suspend:
91
+ self.__send_signal("SIGSTOP")
92
+ else:
93
+ # If we do not want to hardly suspend the processor's process (e.g. to keep network links alive), we request
94
+ # the process to wait on a synchronization event. That will stop the main thread while allowing the helper
95
+ # threads to continue running.
96
+ #
97
+ # See https://github.com/Citi/scaler/issues/14
98
+
99
+ assert self._resume_event is not None
100
+ assert self._resumed_event is not None
101
+ self._resume_event.clear()
102
+ self._resumed_event.clear()
103
+
104
+ self.__send_signal(SUSPEND_SIGNAL)
105
+
106
+ self._suspended = True
107
+
108
+ def resume(self):
109
+ assert self._task is not None
110
+ assert self._suspended is True
111
+
112
+ if self._hard_suspend:
113
+ self.__send_signal("SIGCONT")
114
+ else:
115
+ assert self._resume_event is not None
116
+ assert self._resumed_event is not None
117
+
118
+ self._resume_event.set()
119
+
120
+ # Waits until the processor resumes processing. This avoids any future call to `suspend()` while the
121
+ # processor hasn't returned from the `_resumed_event.wait()` call yet (causes a re-entrant error on Linux).
122
+ self._resumed_event.wait()
123
+
124
+ self._suspended = False
125
+
126
+ def kill(self):
127
+ self.__send_signal("SIGTERM")
128
+ self._processor.join(DEFAULT_PROCESSOR_KILL_DELAY_SECONDS)
129
+
130
+ if self._processor.exitcode is None:
131
+ # TODO: some processors fail to interrupt because of a blocking 0mq call. Ideally we should interrupt
132
+ # these blocking calls instead of sending a SIGKILL signal.
133
+
134
+ logging.warning(f"Processor[{self.pid()}] does not terminate in time, send SIGKILL.")
135
+ self.__send_signal("SIGKILL")
136
+ self._processor.join()
137
+
138
+ self.set_task(None)
139
+
140
+ def __send_signal(self, signal_name: str):
141
+ signal_instance = getattr(signal, signal_name, None)
142
+ if signal_instance is None:
143
+ raise RuntimeError(f"unsupported platform, signal not available: {signal_name}.")
144
+
145
+ os.kill(self.pid(), signal_instance)