opengris-scaler 1.12.37__cp38-cp38-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. opengris_scaler-1.12.37.dist-info/METADATA +730 -0
  2. opengris_scaler-1.12.37.dist-info/RECORD +196 -0
  3. opengris_scaler-1.12.37.dist-info/WHEEL +5 -0
  4. opengris_scaler-1.12.37.dist-info/entry_points.txt +10 -0
  5. opengris_scaler-1.12.37.dist-info/licenses/LICENSE +201 -0
  6. opengris_scaler-1.12.37.dist-info/licenses/LICENSE.spdx +7 -0
  7. opengris_scaler-1.12.37.dist-info/licenses/NOTICE +8 -0
  8. opengris_scaler.libs/libcapnp-1-e88d5415.0.1.so +0 -0
  9. opengris_scaler.libs/libgcc_s-2298274a.so.1 +0 -0
  10. opengris_scaler.libs/libkj-1-9bebd8ac.0.1.so +0 -0
  11. opengris_scaler.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
  12. scaler/__init__.py +14 -0
  13. scaler/about.py +5 -0
  14. scaler/client/__init__.py +0 -0
  15. scaler/client/agent/__init__.py +0 -0
  16. scaler/client/agent/client_agent.py +218 -0
  17. scaler/client/agent/disconnect_manager.py +27 -0
  18. scaler/client/agent/future_manager.py +112 -0
  19. scaler/client/agent/heartbeat_manager.py +74 -0
  20. scaler/client/agent/mixins.py +89 -0
  21. scaler/client/agent/object_manager.py +98 -0
  22. scaler/client/agent/task_manager.py +64 -0
  23. scaler/client/client.py +672 -0
  24. scaler/client/future.py +252 -0
  25. scaler/client/object_buffer.py +129 -0
  26. scaler/client/object_reference.py +25 -0
  27. scaler/client/serializer/__init__.py +0 -0
  28. scaler/client/serializer/default.py +16 -0
  29. scaler/client/serializer/mixins.py +38 -0
  30. scaler/cluster/__init__.py +0 -0
  31. scaler/cluster/cluster.py +95 -0
  32. scaler/cluster/combo.py +157 -0
  33. scaler/cluster/object_storage_server.py +45 -0
  34. scaler/cluster/scheduler.py +86 -0
  35. scaler/config/__init__.py +0 -0
  36. scaler/config/common/__init__.py +0 -0
  37. scaler/config/common/logging.py +41 -0
  38. scaler/config/common/web.py +18 -0
  39. scaler/config/common/worker.py +65 -0
  40. scaler/config/common/worker_adapter.py +28 -0
  41. scaler/config/config_class.py +317 -0
  42. scaler/config/defaults.py +94 -0
  43. scaler/config/mixins.py +20 -0
  44. scaler/config/section/__init__.py +0 -0
  45. scaler/config/section/cluster.py +66 -0
  46. scaler/config/section/ecs_worker_adapter.py +78 -0
  47. scaler/config/section/native_worker_adapter.py +30 -0
  48. scaler/config/section/object_storage_server.py +13 -0
  49. scaler/config/section/scheduler.py +126 -0
  50. scaler/config/section/symphony_worker_adapter.py +35 -0
  51. scaler/config/section/top.py +16 -0
  52. scaler/config/section/webui.py +16 -0
  53. scaler/config/types/__init__.py +0 -0
  54. scaler/config/types/network_backend.py +12 -0
  55. scaler/config/types/object_storage_server.py +45 -0
  56. scaler/config/types/worker.py +67 -0
  57. scaler/config/types/zmq.py +83 -0
  58. scaler/entry_points/__init__.py +0 -0
  59. scaler/entry_points/cluster.py +10 -0
  60. scaler/entry_points/object_storage_server.py +26 -0
  61. scaler/entry_points/scheduler.py +51 -0
  62. scaler/entry_points/top.py +272 -0
  63. scaler/entry_points/webui.py +6 -0
  64. scaler/entry_points/worker_adapter_ecs.py +22 -0
  65. scaler/entry_points/worker_adapter_native.py +31 -0
  66. scaler/entry_points/worker_adapter_symphony.py +26 -0
  67. scaler/io/__init__.py +0 -0
  68. scaler/io/async_binder.py +89 -0
  69. scaler/io/async_connector.py +95 -0
  70. scaler/io/async_object_storage_connector.py +225 -0
  71. scaler/io/mixins.py +154 -0
  72. scaler/io/sync_connector.py +68 -0
  73. scaler/io/sync_object_storage_connector.py +249 -0
  74. scaler/io/sync_subscriber.py +83 -0
  75. scaler/io/utility.py +80 -0
  76. scaler/io/ymq/__init__.py +0 -0
  77. scaler/io/ymq/_ymq.pyi +95 -0
  78. scaler/io/ymq/_ymq.so +0 -0
  79. scaler/io/ymq/ymq.py +138 -0
  80. scaler/io/ymq_async_object_storage_connector.py +184 -0
  81. scaler/io/ymq_sync_object_storage_connector.py +184 -0
  82. scaler/object_storage/__init__.py +0 -0
  83. scaler/object_storage/object_storage_server.so +0 -0
  84. scaler/protocol/__init__.py +0 -0
  85. scaler/protocol/capnp/__init__.py +0 -0
  86. scaler/protocol/capnp/_python.py +6 -0
  87. scaler/protocol/capnp/common.capnp +68 -0
  88. scaler/protocol/capnp/message.capnp +218 -0
  89. scaler/protocol/capnp/object_storage.capnp +57 -0
  90. scaler/protocol/capnp/status.capnp +73 -0
  91. scaler/protocol/introduction.md +105 -0
  92. scaler/protocol/python/__init__.py +0 -0
  93. scaler/protocol/python/common.py +140 -0
  94. scaler/protocol/python/message.py +751 -0
  95. scaler/protocol/python/mixins.py +13 -0
  96. scaler/protocol/python/object_storage.py +118 -0
  97. scaler/protocol/python/status.py +279 -0
  98. scaler/protocol/worker.md +228 -0
  99. scaler/scheduler/__init__.py +0 -0
  100. scaler/scheduler/allocate_policy/__init__.py +0 -0
  101. scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
  102. scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
  103. scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
  104. scaler/scheduler/allocate_policy/mixins.py +55 -0
  105. scaler/scheduler/controllers/__init__.py +0 -0
  106. scaler/scheduler/controllers/balance_controller.py +65 -0
  107. scaler/scheduler/controllers/client_controller.py +131 -0
  108. scaler/scheduler/controllers/config_controller.py +31 -0
  109. scaler/scheduler/controllers/graph_controller.py +424 -0
  110. scaler/scheduler/controllers/information_controller.py +81 -0
  111. scaler/scheduler/controllers/mixins.py +194 -0
  112. scaler/scheduler/controllers/object_controller.py +147 -0
  113. scaler/scheduler/controllers/scaling_policies/__init__.py +0 -0
  114. scaler/scheduler/controllers/scaling_policies/fixed_elastic.py +145 -0
  115. scaler/scheduler/controllers/scaling_policies/mixins.py +10 -0
  116. scaler/scheduler/controllers/scaling_policies/null.py +14 -0
  117. scaler/scheduler/controllers/scaling_policies/types.py +9 -0
  118. scaler/scheduler/controllers/scaling_policies/utility.py +20 -0
  119. scaler/scheduler/controllers/scaling_policies/vanilla.py +95 -0
  120. scaler/scheduler/controllers/task_controller.py +376 -0
  121. scaler/scheduler/controllers/worker_controller.py +169 -0
  122. scaler/scheduler/object_usage/__init__.py +0 -0
  123. scaler/scheduler/object_usage/object_tracker.py +131 -0
  124. scaler/scheduler/scheduler.py +251 -0
  125. scaler/scheduler/task/__init__.py +0 -0
  126. scaler/scheduler/task/task_state_machine.py +92 -0
  127. scaler/scheduler/task/task_state_manager.py +61 -0
  128. scaler/ui/__init__.py +0 -0
  129. scaler/ui/common/__init__.py +0 -0
  130. scaler/ui/common/constants.py +9 -0
  131. scaler/ui/common/live_display.py +147 -0
  132. scaler/ui/common/memory_window.py +146 -0
  133. scaler/ui/common/setting_page.py +40 -0
  134. scaler/ui/common/task_graph.py +840 -0
  135. scaler/ui/common/task_log.py +111 -0
  136. scaler/ui/common/utility.py +66 -0
  137. scaler/ui/common/webui.py +80 -0
  138. scaler/ui/common/worker_processors.py +104 -0
  139. scaler/ui/v1.py +76 -0
  140. scaler/ui/v2.py +102 -0
  141. scaler/ui/webui.py +21 -0
  142. scaler/utility/__init__.py +0 -0
  143. scaler/utility/debug.py +19 -0
  144. scaler/utility/event_list.py +63 -0
  145. scaler/utility/event_loop.py +58 -0
  146. scaler/utility/exceptions.py +42 -0
  147. scaler/utility/formatter.py +44 -0
  148. scaler/utility/graph/__init__.py +0 -0
  149. scaler/utility/graph/optimization.py +27 -0
  150. scaler/utility/graph/topological_sorter.py +11 -0
  151. scaler/utility/graph/topological_sorter_graphblas.py +174 -0
  152. scaler/utility/identifiers.py +107 -0
  153. scaler/utility/logging/__init__.py +0 -0
  154. scaler/utility/logging/decorators.py +25 -0
  155. scaler/utility/logging/scoped_logger.py +33 -0
  156. scaler/utility/logging/utility.py +183 -0
  157. scaler/utility/many_to_many_dict.py +123 -0
  158. scaler/utility/metadata/__init__.py +0 -0
  159. scaler/utility/metadata/profile_result.py +31 -0
  160. scaler/utility/metadata/task_flags.py +30 -0
  161. scaler/utility/mixins.py +13 -0
  162. scaler/utility/network_util.py +7 -0
  163. scaler/utility/one_to_many_dict.py +72 -0
  164. scaler/utility/queues/__init__.py +0 -0
  165. scaler/utility/queues/async_indexed_queue.py +37 -0
  166. scaler/utility/queues/async_priority_queue.py +70 -0
  167. scaler/utility/queues/async_sorted_priority_queue.py +45 -0
  168. scaler/utility/queues/indexed_queue.py +114 -0
  169. scaler/utility/serialization.py +9 -0
  170. scaler/version.txt +1 -0
  171. scaler/worker/__init__.py +0 -0
  172. scaler/worker/agent/__init__.py +0 -0
  173. scaler/worker/agent/heartbeat_manager.py +110 -0
  174. scaler/worker/agent/mixins.py +137 -0
  175. scaler/worker/agent/processor/__init__.py +0 -0
  176. scaler/worker/agent/processor/object_cache.py +107 -0
  177. scaler/worker/agent/processor/processor.py +285 -0
  178. scaler/worker/agent/processor/streaming_buffer.py +28 -0
  179. scaler/worker/agent/processor_holder.py +147 -0
  180. scaler/worker/agent/processor_manager.py +369 -0
  181. scaler/worker/agent/profiling_manager.py +109 -0
  182. scaler/worker/agent/task_manager.py +150 -0
  183. scaler/worker/agent/timeout_manager.py +19 -0
  184. scaler/worker/preload.py +84 -0
  185. scaler/worker/worker.py +265 -0
  186. scaler/worker_adapter/__init__.py +0 -0
  187. scaler/worker_adapter/common.py +26 -0
  188. scaler/worker_adapter/ecs.py +241 -0
  189. scaler/worker_adapter/native.py +138 -0
  190. scaler/worker_adapter/symphony/__init__.py +0 -0
  191. scaler/worker_adapter/symphony/callback.py +45 -0
  192. scaler/worker_adapter/symphony/heartbeat_manager.py +82 -0
  193. scaler/worker_adapter/symphony/message.py +24 -0
  194. scaler/worker_adapter/symphony/task_manager.py +289 -0
  195. scaler/worker_adapter/symphony/worker.py +204 -0
  196. scaler/worker_adapter/symphony/worker_adapter.py +123 -0
@@ -0,0 +1,114 @@
1
+ import dataclasses
2
+ from typing import Any, Dict, Hashable, Optional
3
+
4
+
5
+ @dataclasses.dataclass
6
+ class _Node:
7
+ value: Any
8
+ prev: Optional["_Node"] = None
9
+ next: Optional["_Node"] = None
10
+
11
+
12
+ class _DoubleLinkedList:
13
+ def __init__(self):
14
+ self._head: Optional[_Node] = None
15
+ self._tail: Optional[_Node] = None
16
+ self._size = 0
17
+
18
+ def __len__(self):
19
+ return self._size
20
+
21
+ def add_to_head(self, node: _Node):
22
+ if self._head is None:
23
+ self._head = node
24
+ self._tail = node
25
+ else:
26
+ node.next = self._head
27
+ self._head.prev = node
28
+ self._head = node
29
+
30
+ self._size += 1
31
+
32
+ def remove_tail(self):
33
+ if self._tail is None:
34
+ raise IndexError(f"{self.__class__.__name__} queue empty")
35
+
36
+ node = self._tail
37
+ if self._tail.prev is None:
38
+ self._head = None
39
+ self._tail = None
40
+ else:
41
+ self._tail = self._tail.prev
42
+ self._tail.next = None
43
+
44
+ self._size -= 1
45
+ return node
46
+
47
+ def remove(self, node: _Node):
48
+ prev_node = node.prev
49
+ next_node = node.next
50
+ if prev_node and next_node:
51
+ prev_node.next = next_node
52
+ next_node.prev = prev_node
53
+
54
+ elif not prev_node and not next_node:
55
+ assert self._head is node
56
+ assert self._tail is node
57
+ self._head = None
58
+ self._tail = None
59
+
60
+ elif prev_node and not next_node:
61
+ assert self._tail is node
62
+ prev_node.next = None
63
+ self._tail = prev_node
64
+
65
+ elif not prev_node and next_node:
66
+ assert self._head is node
67
+ next_node.prev = None
68
+ self._head = next_node
69
+
70
+ self._size -= 1
71
+ del node
72
+
73
+
74
+ class IndexedQueue:
75
+ """A queue that provides O(1) operations for adding and removing any item."""
76
+
77
+ def __init__(self):
78
+ self._double_linked_list = _DoubleLinkedList()
79
+ self._hash_map: Dict[int, _Node] = {}
80
+
81
+ def __contains__(self, item: Hashable):
82
+ key = hash(item)
83
+ return key in self._hash_map
84
+
85
+ def __len__(self):
86
+ return self._double_linked_list.__len__()
87
+
88
+ def __iter__(self):
89
+ node = self._double_linked_list._tail
90
+ while node is not None:
91
+ yield node.value
92
+ node = node.prev
93
+
94
+ def put(self, item: Hashable):
95
+ key = hash(item)
96
+ if key in self._hash_map:
97
+ raise KeyError(f"{self.__class__.__name__} already have item: {item}")
98
+
99
+ node = _Node(item)
100
+ self._double_linked_list.add_to_head(node)
101
+ self._hash_map[key] = node
102
+
103
+ def get(self):
104
+ node = self._double_linked_list.remove_tail()
105
+ del self._hash_map[hash(node.value)]
106
+ return node.value
107
+
108
+ def remove(self, item: Hashable):
109
+ key = hash(item)
110
+ if key not in self._hash_map:
111
+ raise ValueError(f"{self.__class__.__name__} doesn't have item: {item}")
112
+
113
+ node = self._hash_map.pop(key)
114
+ self._double_linked_list.remove(node)
@@ -0,0 +1,9 @@
1
+ import pickle
2
+
3
+
4
+ def serialize_failure(exp: Exception) -> bytes:
5
+ return pickle.dumps(exp, protocol=pickle.HIGHEST_PROTOCOL)
6
+
7
+
8
+ def deserialize_failure(result: bytes) -> Exception:
9
+ return pickle.loads(result)
scaler/version.txt ADDED
@@ -0,0 +1 @@
1
+ 1.12.37
File without changes
File without changes
@@ -0,0 +1,110 @@
1
+ import time
2
+ from typing import Dict, Optional
3
+
4
+ import psutil
5
+
6
+ from scaler.config.types.object_storage_server import ObjectStorageAddressConfig
7
+ from scaler.io.mixins import AsyncConnector, AsyncObjectStorageConnector
8
+ from scaler.protocol.python.message import Resource, WorkerHeartbeat, WorkerHeartbeatEcho
9
+ from scaler.protocol.python.status import ProcessorStatus
10
+ from scaler.utility.mixins import Looper
11
+ from scaler.worker.agent.mixins import HeartbeatManager, ProcessorManager, TaskManager, TimeoutManager
12
+ from scaler.worker.agent.processor_holder import ProcessorHolder
13
+
14
+
15
+ class VanillaHeartbeatManager(Looper, HeartbeatManager):
16
+ def __init__(
17
+ self,
18
+ object_storage_address: Optional[ObjectStorageAddressConfig],
19
+ capabilities: Dict[str, int],
20
+ task_queue_size: int,
21
+ ):
22
+ self._agent_process = psutil.Process()
23
+ self._capabilities = capabilities
24
+ self._task_queue_size = task_queue_size
25
+
26
+ self._connector_external: Optional[AsyncConnector] = None
27
+ self._connector_storage: Optional[AsyncObjectStorageConnector] = None
28
+ self._worker_task_manager: Optional[TaskManager] = None
29
+ self._timeout_manager: Optional[TimeoutManager] = None
30
+ self._processor_manager: Optional[ProcessorManager] = None
31
+
32
+ self._start_timestamp_ns = 0
33
+ self._latency_us = 0
34
+
35
+ self._object_storage_address: Optional[ObjectStorageAddressConfig] = object_storage_address
36
+
37
+ def register(
38
+ self,
39
+ connector_external: AsyncConnector,
40
+ connector_storage: AsyncObjectStorageConnector,
41
+ worker_task_manager: TaskManager,
42
+ timeout_manager: TimeoutManager,
43
+ processor_manager: ProcessorManager,
44
+ ):
45
+ self._connector_external = connector_external
46
+ self._connector_storage = connector_storage
47
+ self._worker_task_manager = worker_task_manager
48
+ self._timeout_manager = timeout_manager
49
+ self._processor_manager = processor_manager
50
+
51
+ async def on_heartbeat_echo(self, heartbeat: WorkerHeartbeatEcho):
52
+ if self._start_timestamp_ns == 0:
53
+ # not handling echo if we didn't send out heartbeat
54
+ return
55
+
56
+ self._latency_us = int(((time.time_ns() - self._start_timestamp_ns) / 2) // 1_000)
57
+ self._start_timestamp_ns = 0
58
+ self._timeout_manager.update_last_seen_time()
59
+
60
+ if self._object_storage_address is None:
61
+ address_message = heartbeat.object_storage_address()
62
+ self._object_storage_address = ObjectStorageAddressConfig(address_message.host, address_message.port)
63
+ await self._connector_storage.connect(self._object_storage_address.host, self._object_storage_address.port)
64
+
65
+ async def routine(self):
66
+ processors = self._processor_manager.processors()
67
+
68
+ if self._start_timestamp_ns != 0:
69
+ # already sent heartbeat, expecting heartbeat echo, so not sending
70
+ return
71
+
72
+ for processor_holder in processors:
73
+ status = processor_holder.process().status()
74
+ if status in {psutil.STATUS_ZOMBIE, psutil.STATUS_DEAD}:
75
+ await self._processor_manager.on_failing_processor(processor_holder.processor_id(), status)
76
+
77
+ processors = self._processor_manager.processors() # refreshes for removed dead and zombie processors
78
+ num_suspended_processors = self._processor_manager.num_suspended_processors()
79
+
80
+ # TODO: add task queue size to WorkerHeartbeat
81
+ await self._connector_external.send(
82
+ WorkerHeartbeat.new_msg(
83
+ Resource.new_msg(int(self._agent_process.cpu_percent() * 10), self._agent_process.memory_info().rss),
84
+ psutil.virtual_memory().available,
85
+ self._task_queue_size,
86
+ self._worker_task_manager.get_queued_size() - num_suspended_processors,
87
+ self._latency_us,
88
+ self._processor_manager.can_accept_task(),
89
+ [self.__get_processor_status_from_holder(processor) for processor in processors],
90
+ self._capabilities,
91
+ )
92
+ )
93
+ self._start_timestamp_ns = time.time_ns()
94
+
95
+ def get_object_storage_address(self) -> Optional[ObjectStorageAddressConfig]:
96
+ return self._object_storage_address
97
+
98
+ @staticmethod
99
+ def __get_processor_status_from_holder(processor: ProcessorHolder) -> ProcessorStatus:
100
+ process = processor.process()
101
+
102
+ try:
103
+ resource = Resource.new_msg(int(process.cpu_percent() * 10), process.memory_info().rss)
104
+ except psutil.ZombieProcess:
105
+ # Assumes dead processes do not use any resources
106
+ resource = Resource.new_msg(0, 0)
107
+
108
+ return ProcessorStatus.new_msg(
109
+ processor.pid(), processor.initialized(), processor.task() is not None, processor.suspended(), resource
110
+ )
@@ -0,0 +1,137 @@
1
+ import abc
2
+ from typing import List, Optional
3
+
4
+ from scaler.config.types.object_storage_server import ObjectStorageAddressConfig
5
+ from scaler.protocol.python.message import (
6
+ ObjectInstruction,
7
+ ProcessorInitialized,
8
+ Task,
9
+ TaskCancel,
10
+ TaskResult,
11
+ WorkerHeartbeatEcho,
12
+ )
13
+ from scaler.utility.identifiers import ProcessorID, TaskID
14
+ from scaler.utility.metadata.profile_result import ProfileResult
15
+ from scaler.worker.agent.processor_holder import ProcessorHolder
16
+
17
+
18
+ class HeartbeatManager(metaclass=abc.ABCMeta):
19
+ @abc.abstractmethod
20
+ async def on_heartbeat_echo(self, heartbeat: WorkerHeartbeatEcho):
21
+ raise NotImplementedError()
22
+
23
+ @abc.abstractmethod
24
+ def get_object_storage_address(self) -> ObjectStorageAddressConfig:
25
+ raise NotImplementedError()
26
+
27
+
28
+ class TimeoutManager(metaclass=abc.ABCMeta):
29
+ @abc.abstractmethod
30
+ def update_last_seen_time(self):
31
+ raise NotImplementedError()
32
+
33
+
34
+ class TaskManager(metaclass=abc.ABCMeta):
35
+ @abc.abstractmethod
36
+ async def on_task_new(self, task: Task):
37
+ raise NotImplementedError()
38
+
39
+ @abc.abstractmethod
40
+ async def on_task_result(self, result: TaskResult):
41
+ raise NotImplementedError()
42
+
43
+ @abc.abstractmethod
44
+ def on_cancel_task(self, task_cancel: TaskCancel):
45
+ raise NotImplementedError()
46
+
47
+ @abc.abstractmethod
48
+ def get_queued_size(self):
49
+ raise NotImplementedError()
50
+
51
+
52
+ class ProcessorManager(metaclass=abc.ABCMeta):
53
+ @abc.abstractmethod
54
+ def can_accept_task(self) -> bool:
55
+ raise NotImplementedError()
56
+
57
+ @abc.abstractmethod
58
+ async def wait_until_can_accept_task(self):
59
+ raise NotImplementedError()
60
+
61
+ @abc.abstractmethod
62
+ async def on_processor_initialized(self, processor_id: ProcessorID, processor_initialized: ProcessorInitialized):
63
+ raise NotImplementedError()
64
+
65
+ @abc.abstractmethod
66
+ async def on_task(self, task: Task) -> bool:
67
+ raise NotImplementedError()
68
+
69
+ @abc.abstractmethod
70
+ async def on_cancel_task(self, task_id: TaskID) -> Optional[Task]:
71
+ raise NotImplementedError()
72
+
73
+ @abc.abstractmethod
74
+ async def on_failing_processor(self, processor_id: ProcessorID, process_status: str):
75
+ raise NotImplementedError()
76
+
77
+ @abc.abstractmethod
78
+ async def on_suspend_task(self, task_id: TaskID) -> bool:
79
+ raise NotImplementedError()
80
+
81
+ @abc.abstractmethod
82
+ def on_resume_task(self, task_id: TaskID) -> bool:
83
+ raise NotImplementedError()
84
+
85
+ @abc.abstractmethod
86
+ async def on_task_result(self, processor_id: ProcessorID, task_result: TaskResult):
87
+ raise NotImplementedError()
88
+
89
+ @abc.abstractmethod
90
+ async def on_external_object_instruction(self, instruction: ObjectInstruction):
91
+ raise NotImplementedError()
92
+
93
+ @abc.abstractmethod
94
+ async def on_internal_object_instruction(self, processor_id: ProcessorID, instruction: ObjectInstruction):
95
+ raise NotImplementedError()
96
+
97
+ @abc.abstractmethod
98
+ def destroy(self, reason: str):
99
+ raise NotImplementedError()
100
+
101
+ @abc.abstractmethod
102
+ def current_processor_is_initialized(self) -> bool:
103
+ raise NotImplementedError()
104
+
105
+ @abc.abstractmethod
106
+ def current_task(self) -> Optional[Task]:
107
+ raise NotImplementedError()
108
+
109
+ @abc.abstractmethod
110
+ def current_task_id(self) -> TaskID:
111
+ raise NotImplementedError()
112
+
113
+ @abc.abstractmethod
114
+ def processors(self) -> List[ProcessorHolder]:
115
+ raise NotImplementedError()
116
+
117
+ @abc.abstractmethod
118
+ def num_suspended_processors(self) -> int:
119
+ raise NotImplementedError()
120
+
121
+
122
+ class ProfilingManager(metaclass=abc.ABCMeta):
123
+ @abc.abstractmethod
124
+ def on_process_start(self, pid: int):
125
+ raise NotImplementedError()
126
+
127
+ @abc.abstractmethod
128
+ def on_process_end(self, pid: int):
129
+ raise NotImplementedError()
130
+
131
+ @abc.abstractmethod
132
+ def on_task_start(self, pid: int, task_id: TaskID):
133
+ raise NotImplementedError()
134
+
135
+ @abc.abstractmethod
136
+ def on_task_end(self, pid: int, task_id: TaskID) -> ProfileResult:
137
+ raise NotImplementedError()
File without changes
@@ -0,0 +1,107 @@
1
+ import ctypes
2
+ import gc
3
+ import logging
4
+ import multiprocessing
5
+ import platform
6
+ import threading
7
+ import time
8
+ from typing import Any, Dict, Optional
9
+
10
+ import cloudpickle
11
+ import psutil
12
+
13
+ from scaler.client.serializer.mixins import Serializer
14
+ from scaler.config.defaults import CLEANUP_INTERVAL_SECONDS
15
+ from scaler.utility.exceptions import DeserializeObjectError
16
+ from scaler.utility.identifiers import ClientID, ObjectID
17
+
18
+
19
+ class ObjectCache(threading.Thread):
20
+ def __init__(self, garbage_collect_interval_seconds: int, trim_memory_threshold_bytes: int):
21
+ threading.Thread.__init__(self)
22
+
23
+ self._serializers: Dict[ClientID, Serializer] = dict()
24
+
25
+ self._garbage_collect_interval_seconds = garbage_collect_interval_seconds
26
+ self._previous_garbage_collect_time = time.time()
27
+ self._trim_memory_threshold_bytes = trim_memory_threshold_bytes
28
+
29
+ self._cached_objects: Dict[ObjectID, Any] = {}
30
+ self._cached_objects_alive_since: Dict[ObjectID, float] = dict()
31
+ self._process = psutil.Process(multiprocessing.current_process().pid)
32
+ self._libc = ctypes.cdll.LoadLibrary("libc.{}".format("so.6" if platform.uname()[0] != "Darwin" else "dylib"))
33
+
34
+ self._stop_event = threading.Event()
35
+
36
+ def run(self) -> None:
37
+ try:
38
+ while not self._stop_event.wait(timeout=CLEANUP_INTERVAL_SECONDS):
39
+ self.__clean_memory()
40
+ finally:
41
+ self.__clear() # gracefully destroy all cached objects
42
+
43
+ def destroy(self) -> None:
44
+ self._stop_event.set()
45
+
46
+ def add_serializer(self, client: ClientID, serializer: Serializer):
47
+ self._serializers[client] = serializer
48
+
49
+ def serialize(self, client: ClientID, obj: Any) -> bytes:
50
+ return self.get_serializer(client).serialize(obj)
51
+
52
+ def deserialize(self, client: ClientID, payload: bytes) -> Any:
53
+ return self.get_serializer(client).deserialize(payload)
54
+
55
+ def add_object(self, client: ClientID, object_id: ObjectID, object_bytes: bytes) -> None:
56
+
57
+ if object_id.is_serializer():
58
+ self.add_serializer(client, cloudpickle.loads(object_bytes))
59
+ else:
60
+ try:
61
+ deserialized = self.deserialize(client, object_bytes)
62
+ except Exception: # noqa
63
+ logging.exception(f"failed to deserialize received {object_id!r}, length={len(object_bytes)}")
64
+
65
+ self._cached_objects[object_id] = deserialized
66
+ self._cached_objects_alive_since[object_id] = time.time()
67
+
68
+ def del_object(self, object_id: ObjectID):
69
+ self._cached_objects_alive_since.pop(object_id, None)
70
+ self._cached_objects.pop(object_id, None)
71
+
72
+ def has_object(self, object_id: ObjectID):
73
+ return object_id in self._cached_objects or object_id in self._serializers
74
+
75
+ def get_object(self, object_id: ObjectID) -> Optional[Any]:
76
+ if object_id not in self._cached_objects:
77
+ raise ValueError(f"cannot get object for {object_id!r}")
78
+
79
+ obj = self._cached_objects[object_id]
80
+
81
+ self._cached_objects_alive_since[object_id] = time.time()
82
+ return obj
83
+
84
+ def get_serializer(self, client: ClientID) -> Serializer:
85
+ serializer = self._serializers.get(client)
86
+
87
+ if serializer is None:
88
+ raise DeserializeObjectError(f"cannot get serializer for {client!r}")
89
+
90
+ return serializer
91
+
92
+ def __clean_memory(self):
93
+ if time.time() - self._previous_garbage_collect_time < self._garbage_collect_interval_seconds:
94
+ return
95
+
96
+ self._previous_garbage_collect_time = time.time()
97
+
98
+ gc.collect()
99
+
100
+ if self._process.memory_info().rss < self._trim_memory_threshold_bytes:
101
+ return
102
+
103
+ self._libc.malloc_trim(0)
104
+
105
+ def __clear(self) -> None:
106
+ self._cached_objects.clear()
107
+ self._cached_objects_alive_since.clear()