opengris-scaler 1.12.37__cp38-cp38-musllinux_1_2_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opengris_scaler-1.12.37.dist-info/METADATA +730 -0
- opengris_scaler-1.12.37.dist-info/RECORD +196 -0
- opengris_scaler-1.12.37.dist-info/WHEEL +5 -0
- opengris_scaler-1.12.37.dist-info/entry_points.txt +10 -0
- opengris_scaler-1.12.37.dist-info/licenses/LICENSE +201 -0
- opengris_scaler-1.12.37.dist-info/licenses/LICENSE.spdx +7 -0
- opengris_scaler-1.12.37.dist-info/licenses/NOTICE +8 -0
- opengris_scaler.libs/libcapnp-1-e88d5415.0.1.so +0 -0
- opengris_scaler.libs/libgcc_s-2298274a.so.1 +0 -0
- opengris_scaler.libs/libkj-1-9bebd8ac.0.1.so +0 -0
- opengris_scaler.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
- scaler/__init__.py +14 -0
- scaler/about.py +5 -0
- scaler/client/__init__.py +0 -0
- scaler/client/agent/__init__.py +0 -0
- scaler/client/agent/client_agent.py +218 -0
- scaler/client/agent/disconnect_manager.py +27 -0
- scaler/client/agent/future_manager.py +112 -0
- scaler/client/agent/heartbeat_manager.py +74 -0
- scaler/client/agent/mixins.py +89 -0
- scaler/client/agent/object_manager.py +98 -0
- scaler/client/agent/task_manager.py +64 -0
- scaler/client/client.py +672 -0
- scaler/client/future.py +252 -0
- scaler/client/object_buffer.py +129 -0
- scaler/client/object_reference.py +25 -0
- scaler/client/serializer/__init__.py +0 -0
- scaler/client/serializer/default.py +16 -0
- scaler/client/serializer/mixins.py +38 -0
- scaler/cluster/__init__.py +0 -0
- scaler/cluster/cluster.py +95 -0
- scaler/cluster/combo.py +157 -0
- scaler/cluster/object_storage_server.py +45 -0
- scaler/cluster/scheduler.py +86 -0
- scaler/config/__init__.py +0 -0
- scaler/config/common/__init__.py +0 -0
- scaler/config/common/logging.py +41 -0
- scaler/config/common/web.py +18 -0
- scaler/config/common/worker.py +65 -0
- scaler/config/common/worker_adapter.py +28 -0
- scaler/config/config_class.py +317 -0
- scaler/config/defaults.py +94 -0
- scaler/config/mixins.py +20 -0
- scaler/config/section/__init__.py +0 -0
- scaler/config/section/cluster.py +66 -0
- scaler/config/section/ecs_worker_adapter.py +78 -0
- scaler/config/section/native_worker_adapter.py +30 -0
- scaler/config/section/object_storage_server.py +13 -0
- scaler/config/section/scheduler.py +126 -0
- scaler/config/section/symphony_worker_adapter.py +35 -0
- scaler/config/section/top.py +16 -0
- scaler/config/section/webui.py +16 -0
- scaler/config/types/__init__.py +0 -0
- scaler/config/types/network_backend.py +12 -0
- scaler/config/types/object_storage_server.py +45 -0
- scaler/config/types/worker.py +67 -0
- scaler/config/types/zmq.py +83 -0
- scaler/entry_points/__init__.py +0 -0
- scaler/entry_points/cluster.py +10 -0
- scaler/entry_points/object_storage_server.py +26 -0
- scaler/entry_points/scheduler.py +51 -0
- scaler/entry_points/top.py +272 -0
- scaler/entry_points/webui.py +6 -0
- scaler/entry_points/worker_adapter_ecs.py +22 -0
- scaler/entry_points/worker_adapter_native.py +31 -0
- scaler/entry_points/worker_adapter_symphony.py +26 -0
- scaler/io/__init__.py +0 -0
- scaler/io/async_binder.py +89 -0
- scaler/io/async_connector.py +95 -0
- scaler/io/async_object_storage_connector.py +225 -0
- scaler/io/mixins.py +154 -0
- scaler/io/sync_connector.py +68 -0
- scaler/io/sync_object_storage_connector.py +249 -0
- scaler/io/sync_subscriber.py +83 -0
- scaler/io/utility.py +80 -0
- scaler/io/ymq/__init__.py +0 -0
- scaler/io/ymq/_ymq.pyi +95 -0
- scaler/io/ymq/_ymq.so +0 -0
- scaler/io/ymq/ymq.py +138 -0
- scaler/io/ymq_async_object_storage_connector.py +184 -0
- scaler/io/ymq_sync_object_storage_connector.py +184 -0
- scaler/object_storage/__init__.py +0 -0
- scaler/object_storage/object_storage_server.so +0 -0
- scaler/protocol/__init__.py +0 -0
- scaler/protocol/capnp/__init__.py +0 -0
- scaler/protocol/capnp/_python.py +6 -0
- scaler/protocol/capnp/common.capnp +68 -0
- scaler/protocol/capnp/message.capnp +218 -0
- scaler/protocol/capnp/object_storage.capnp +57 -0
- scaler/protocol/capnp/status.capnp +73 -0
- scaler/protocol/introduction.md +105 -0
- scaler/protocol/python/__init__.py +0 -0
- scaler/protocol/python/common.py +140 -0
- scaler/protocol/python/message.py +751 -0
- scaler/protocol/python/mixins.py +13 -0
- scaler/protocol/python/object_storage.py +118 -0
- scaler/protocol/python/status.py +279 -0
- scaler/protocol/worker.md +228 -0
- scaler/scheduler/__init__.py +0 -0
- scaler/scheduler/allocate_policy/__init__.py +0 -0
- scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
- scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
- scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
- scaler/scheduler/allocate_policy/mixins.py +55 -0
- scaler/scheduler/controllers/__init__.py +0 -0
- scaler/scheduler/controllers/balance_controller.py +65 -0
- scaler/scheduler/controllers/client_controller.py +131 -0
- scaler/scheduler/controllers/config_controller.py +31 -0
- scaler/scheduler/controllers/graph_controller.py +424 -0
- scaler/scheduler/controllers/information_controller.py +81 -0
- scaler/scheduler/controllers/mixins.py +194 -0
- scaler/scheduler/controllers/object_controller.py +147 -0
- scaler/scheduler/controllers/scaling_policies/__init__.py +0 -0
- scaler/scheduler/controllers/scaling_policies/fixed_elastic.py +145 -0
- scaler/scheduler/controllers/scaling_policies/mixins.py +10 -0
- scaler/scheduler/controllers/scaling_policies/null.py +14 -0
- scaler/scheduler/controllers/scaling_policies/types.py +9 -0
- scaler/scheduler/controllers/scaling_policies/utility.py +20 -0
- scaler/scheduler/controllers/scaling_policies/vanilla.py +95 -0
- scaler/scheduler/controllers/task_controller.py +376 -0
- scaler/scheduler/controllers/worker_controller.py +169 -0
- scaler/scheduler/object_usage/__init__.py +0 -0
- scaler/scheduler/object_usage/object_tracker.py +131 -0
- scaler/scheduler/scheduler.py +251 -0
- scaler/scheduler/task/__init__.py +0 -0
- scaler/scheduler/task/task_state_machine.py +92 -0
- scaler/scheduler/task/task_state_manager.py +61 -0
- scaler/ui/__init__.py +0 -0
- scaler/ui/common/__init__.py +0 -0
- scaler/ui/common/constants.py +9 -0
- scaler/ui/common/live_display.py +147 -0
- scaler/ui/common/memory_window.py +146 -0
- scaler/ui/common/setting_page.py +40 -0
- scaler/ui/common/task_graph.py +840 -0
- scaler/ui/common/task_log.py +111 -0
- scaler/ui/common/utility.py +66 -0
- scaler/ui/common/webui.py +80 -0
- scaler/ui/common/worker_processors.py +104 -0
- scaler/ui/v1.py +76 -0
- scaler/ui/v2.py +102 -0
- scaler/ui/webui.py +21 -0
- scaler/utility/__init__.py +0 -0
- scaler/utility/debug.py +19 -0
- scaler/utility/event_list.py +63 -0
- scaler/utility/event_loop.py +58 -0
- scaler/utility/exceptions.py +42 -0
- scaler/utility/formatter.py +44 -0
- scaler/utility/graph/__init__.py +0 -0
- scaler/utility/graph/optimization.py +27 -0
- scaler/utility/graph/topological_sorter.py +11 -0
- scaler/utility/graph/topological_sorter_graphblas.py +174 -0
- scaler/utility/identifiers.py +107 -0
- scaler/utility/logging/__init__.py +0 -0
- scaler/utility/logging/decorators.py +25 -0
- scaler/utility/logging/scoped_logger.py +33 -0
- scaler/utility/logging/utility.py +183 -0
- scaler/utility/many_to_many_dict.py +123 -0
- scaler/utility/metadata/__init__.py +0 -0
- scaler/utility/metadata/profile_result.py +31 -0
- scaler/utility/metadata/task_flags.py +30 -0
- scaler/utility/mixins.py +13 -0
- scaler/utility/network_util.py +7 -0
- scaler/utility/one_to_many_dict.py +72 -0
- scaler/utility/queues/__init__.py +0 -0
- scaler/utility/queues/async_indexed_queue.py +37 -0
- scaler/utility/queues/async_priority_queue.py +70 -0
- scaler/utility/queues/async_sorted_priority_queue.py +45 -0
- scaler/utility/queues/indexed_queue.py +114 -0
- scaler/utility/serialization.py +9 -0
- scaler/version.txt +1 -0
- scaler/worker/__init__.py +0 -0
- scaler/worker/agent/__init__.py +0 -0
- scaler/worker/agent/heartbeat_manager.py +110 -0
- scaler/worker/agent/mixins.py +137 -0
- scaler/worker/agent/processor/__init__.py +0 -0
- scaler/worker/agent/processor/object_cache.py +107 -0
- scaler/worker/agent/processor/processor.py +285 -0
- scaler/worker/agent/processor/streaming_buffer.py +28 -0
- scaler/worker/agent/processor_holder.py +147 -0
- scaler/worker/agent/processor_manager.py +369 -0
- scaler/worker/agent/profiling_manager.py +109 -0
- scaler/worker/agent/task_manager.py +150 -0
- scaler/worker/agent/timeout_manager.py +19 -0
- scaler/worker/preload.py +84 -0
- scaler/worker/worker.py +265 -0
- scaler/worker_adapter/__init__.py +0 -0
- scaler/worker_adapter/common.py +26 -0
- scaler/worker_adapter/ecs.py +241 -0
- scaler/worker_adapter/native.py +138 -0
- scaler/worker_adapter/symphony/__init__.py +0 -0
- scaler/worker_adapter/symphony/callback.py +45 -0
- scaler/worker_adapter/symphony/heartbeat_manager.py +82 -0
- scaler/worker_adapter/symphony/message.py +24 -0
- scaler/worker_adapter/symphony/task_manager.py +289 -0
- scaler/worker_adapter/symphony/worker.py +204 -0
- scaler/worker_adapter/symphony/worker_adapter.py +123 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
from typing import Any, Dict, Hashable, Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclasses.dataclass
|
|
6
|
+
class _Node:
|
|
7
|
+
value: Any
|
|
8
|
+
prev: Optional["_Node"] = None
|
|
9
|
+
next: Optional["_Node"] = None
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class _DoubleLinkedList:
|
|
13
|
+
def __init__(self):
|
|
14
|
+
self._head: Optional[_Node] = None
|
|
15
|
+
self._tail: Optional[_Node] = None
|
|
16
|
+
self._size = 0
|
|
17
|
+
|
|
18
|
+
def __len__(self):
|
|
19
|
+
return self._size
|
|
20
|
+
|
|
21
|
+
def add_to_head(self, node: _Node):
|
|
22
|
+
if self._head is None:
|
|
23
|
+
self._head = node
|
|
24
|
+
self._tail = node
|
|
25
|
+
else:
|
|
26
|
+
node.next = self._head
|
|
27
|
+
self._head.prev = node
|
|
28
|
+
self._head = node
|
|
29
|
+
|
|
30
|
+
self._size += 1
|
|
31
|
+
|
|
32
|
+
def remove_tail(self):
|
|
33
|
+
if self._tail is None:
|
|
34
|
+
raise IndexError(f"{self.__class__.__name__} queue empty")
|
|
35
|
+
|
|
36
|
+
node = self._tail
|
|
37
|
+
if self._tail.prev is None:
|
|
38
|
+
self._head = None
|
|
39
|
+
self._tail = None
|
|
40
|
+
else:
|
|
41
|
+
self._tail = self._tail.prev
|
|
42
|
+
self._tail.next = None
|
|
43
|
+
|
|
44
|
+
self._size -= 1
|
|
45
|
+
return node
|
|
46
|
+
|
|
47
|
+
def remove(self, node: _Node):
|
|
48
|
+
prev_node = node.prev
|
|
49
|
+
next_node = node.next
|
|
50
|
+
if prev_node and next_node:
|
|
51
|
+
prev_node.next = next_node
|
|
52
|
+
next_node.prev = prev_node
|
|
53
|
+
|
|
54
|
+
elif not prev_node and not next_node:
|
|
55
|
+
assert self._head is node
|
|
56
|
+
assert self._tail is node
|
|
57
|
+
self._head = None
|
|
58
|
+
self._tail = None
|
|
59
|
+
|
|
60
|
+
elif prev_node and not next_node:
|
|
61
|
+
assert self._tail is node
|
|
62
|
+
prev_node.next = None
|
|
63
|
+
self._tail = prev_node
|
|
64
|
+
|
|
65
|
+
elif not prev_node and next_node:
|
|
66
|
+
assert self._head is node
|
|
67
|
+
next_node.prev = None
|
|
68
|
+
self._head = next_node
|
|
69
|
+
|
|
70
|
+
self._size -= 1
|
|
71
|
+
del node
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class IndexedQueue:
|
|
75
|
+
"""A queue that provides O(1) operations for adding and removing any item."""
|
|
76
|
+
|
|
77
|
+
def __init__(self):
|
|
78
|
+
self._double_linked_list = _DoubleLinkedList()
|
|
79
|
+
self._hash_map: Dict[int, _Node] = {}
|
|
80
|
+
|
|
81
|
+
def __contains__(self, item: Hashable):
|
|
82
|
+
key = hash(item)
|
|
83
|
+
return key in self._hash_map
|
|
84
|
+
|
|
85
|
+
def __len__(self):
|
|
86
|
+
return self._double_linked_list.__len__()
|
|
87
|
+
|
|
88
|
+
def __iter__(self):
|
|
89
|
+
node = self._double_linked_list._tail
|
|
90
|
+
while node is not None:
|
|
91
|
+
yield node.value
|
|
92
|
+
node = node.prev
|
|
93
|
+
|
|
94
|
+
def put(self, item: Hashable):
|
|
95
|
+
key = hash(item)
|
|
96
|
+
if key in self._hash_map:
|
|
97
|
+
raise KeyError(f"{self.__class__.__name__} already have item: {item}")
|
|
98
|
+
|
|
99
|
+
node = _Node(item)
|
|
100
|
+
self._double_linked_list.add_to_head(node)
|
|
101
|
+
self._hash_map[key] = node
|
|
102
|
+
|
|
103
|
+
def get(self):
|
|
104
|
+
node = self._double_linked_list.remove_tail()
|
|
105
|
+
del self._hash_map[hash(node.value)]
|
|
106
|
+
return node.value
|
|
107
|
+
|
|
108
|
+
def remove(self, item: Hashable):
|
|
109
|
+
key = hash(item)
|
|
110
|
+
if key not in self._hash_map:
|
|
111
|
+
raise ValueError(f"{self.__class__.__name__} doesn't have item: {item}")
|
|
112
|
+
|
|
113
|
+
node = self._hash_map.pop(key)
|
|
114
|
+
self._double_linked_list.remove(node)
|
scaler/version.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
1.12.37
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from typing import Dict, Optional
|
|
3
|
+
|
|
4
|
+
import psutil
|
|
5
|
+
|
|
6
|
+
from scaler.config.types.object_storage_server import ObjectStorageAddressConfig
|
|
7
|
+
from scaler.io.mixins import AsyncConnector, AsyncObjectStorageConnector
|
|
8
|
+
from scaler.protocol.python.message import Resource, WorkerHeartbeat, WorkerHeartbeatEcho
|
|
9
|
+
from scaler.protocol.python.status import ProcessorStatus
|
|
10
|
+
from scaler.utility.mixins import Looper
|
|
11
|
+
from scaler.worker.agent.mixins import HeartbeatManager, ProcessorManager, TaskManager, TimeoutManager
|
|
12
|
+
from scaler.worker.agent.processor_holder import ProcessorHolder
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class VanillaHeartbeatManager(Looper, HeartbeatManager):
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
object_storage_address: Optional[ObjectStorageAddressConfig],
|
|
19
|
+
capabilities: Dict[str, int],
|
|
20
|
+
task_queue_size: int,
|
|
21
|
+
):
|
|
22
|
+
self._agent_process = psutil.Process()
|
|
23
|
+
self._capabilities = capabilities
|
|
24
|
+
self._task_queue_size = task_queue_size
|
|
25
|
+
|
|
26
|
+
self._connector_external: Optional[AsyncConnector] = None
|
|
27
|
+
self._connector_storage: Optional[AsyncObjectStorageConnector] = None
|
|
28
|
+
self._worker_task_manager: Optional[TaskManager] = None
|
|
29
|
+
self._timeout_manager: Optional[TimeoutManager] = None
|
|
30
|
+
self._processor_manager: Optional[ProcessorManager] = None
|
|
31
|
+
|
|
32
|
+
self._start_timestamp_ns = 0
|
|
33
|
+
self._latency_us = 0
|
|
34
|
+
|
|
35
|
+
self._object_storage_address: Optional[ObjectStorageAddressConfig] = object_storage_address
|
|
36
|
+
|
|
37
|
+
def register(
|
|
38
|
+
self,
|
|
39
|
+
connector_external: AsyncConnector,
|
|
40
|
+
connector_storage: AsyncObjectStorageConnector,
|
|
41
|
+
worker_task_manager: TaskManager,
|
|
42
|
+
timeout_manager: TimeoutManager,
|
|
43
|
+
processor_manager: ProcessorManager,
|
|
44
|
+
):
|
|
45
|
+
self._connector_external = connector_external
|
|
46
|
+
self._connector_storage = connector_storage
|
|
47
|
+
self._worker_task_manager = worker_task_manager
|
|
48
|
+
self._timeout_manager = timeout_manager
|
|
49
|
+
self._processor_manager = processor_manager
|
|
50
|
+
|
|
51
|
+
async def on_heartbeat_echo(self, heartbeat: WorkerHeartbeatEcho):
|
|
52
|
+
if self._start_timestamp_ns == 0:
|
|
53
|
+
# not handling echo if we didn't send out heartbeat
|
|
54
|
+
return
|
|
55
|
+
|
|
56
|
+
self._latency_us = int(((time.time_ns() - self._start_timestamp_ns) / 2) // 1_000)
|
|
57
|
+
self._start_timestamp_ns = 0
|
|
58
|
+
self._timeout_manager.update_last_seen_time()
|
|
59
|
+
|
|
60
|
+
if self._object_storage_address is None:
|
|
61
|
+
address_message = heartbeat.object_storage_address()
|
|
62
|
+
self._object_storage_address = ObjectStorageAddressConfig(address_message.host, address_message.port)
|
|
63
|
+
await self._connector_storage.connect(self._object_storage_address.host, self._object_storage_address.port)
|
|
64
|
+
|
|
65
|
+
async def routine(self):
|
|
66
|
+
processors = self._processor_manager.processors()
|
|
67
|
+
|
|
68
|
+
if self._start_timestamp_ns != 0:
|
|
69
|
+
# already sent heartbeat, expecting heartbeat echo, so not sending
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
for processor_holder in processors:
|
|
73
|
+
status = processor_holder.process().status()
|
|
74
|
+
if status in {psutil.STATUS_ZOMBIE, psutil.STATUS_DEAD}:
|
|
75
|
+
await self._processor_manager.on_failing_processor(processor_holder.processor_id(), status)
|
|
76
|
+
|
|
77
|
+
processors = self._processor_manager.processors() # refreshes for removed dead and zombie processors
|
|
78
|
+
num_suspended_processors = self._processor_manager.num_suspended_processors()
|
|
79
|
+
|
|
80
|
+
# TODO: add task queue size to WorkerHeartbeat
|
|
81
|
+
await self._connector_external.send(
|
|
82
|
+
WorkerHeartbeat.new_msg(
|
|
83
|
+
Resource.new_msg(int(self._agent_process.cpu_percent() * 10), self._agent_process.memory_info().rss),
|
|
84
|
+
psutil.virtual_memory().available,
|
|
85
|
+
self._task_queue_size,
|
|
86
|
+
self._worker_task_manager.get_queued_size() - num_suspended_processors,
|
|
87
|
+
self._latency_us,
|
|
88
|
+
self._processor_manager.can_accept_task(),
|
|
89
|
+
[self.__get_processor_status_from_holder(processor) for processor in processors],
|
|
90
|
+
self._capabilities,
|
|
91
|
+
)
|
|
92
|
+
)
|
|
93
|
+
self._start_timestamp_ns = time.time_ns()
|
|
94
|
+
|
|
95
|
+
def get_object_storage_address(self) -> Optional[ObjectStorageAddressConfig]:
|
|
96
|
+
return self._object_storage_address
|
|
97
|
+
|
|
98
|
+
@staticmethod
|
|
99
|
+
def __get_processor_status_from_holder(processor: ProcessorHolder) -> ProcessorStatus:
|
|
100
|
+
process = processor.process()
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
resource = Resource.new_msg(int(process.cpu_percent() * 10), process.memory_info().rss)
|
|
104
|
+
except psutil.ZombieProcess:
|
|
105
|
+
# Assumes dead processes do not use any resources
|
|
106
|
+
resource = Resource.new_msg(0, 0)
|
|
107
|
+
|
|
108
|
+
return ProcessorStatus.new_msg(
|
|
109
|
+
processor.pid(), processor.initialized(), processor.task() is not None, processor.suspended(), resource
|
|
110
|
+
)
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from scaler.config.types.object_storage_server import ObjectStorageAddressConfig
|
|
5
|
+
from scaler.protocol.python.message import (
|
|
6
|
+
ObjectInstruction,
|
|
7
|
+
ProcessorInitialized,
|
|
8
|
+
Task,
|
|
9
|
+
TaskCancel,
|
|
10
|
+
TaskResult,
|
|
11
|
+
WorkerHeartbeatEcho,
|
|
12
|
+
)
|
|
13
|
+
from scaler.utility.identifiers import ProcessorID, TaskID
|
|
14
|
+
from scaler.utility.metadata.profile_result import ProfileResult
|
|
15
|
+
from scaler.worker.agent.processor_holder import ProcessorHolder
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class HeartbeatManager(metaclass=abc.ABCMeta):
|
|
19
|
+
@abc.abstractmethod
|
|
20
|
+
async def on_heartbeat_echo(self, heartbeat: WorkerHeartbeatEcho):
|
|
21
|
+
raise NotImplementedError()
|
|
22
|
+
|
|
23
|
+
@abc.abstractmethod
|
|
24
|
+
def get_object_storage_address(self) -> ObjectStorageAddressConfig:
|
|
25
|
+
raise NotImplementedError()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TimeoutManager(metaclass=abc.ABCMeta):
|
|
29
|
+
@abc.abstractmethod
|
|
30
|
+
def update_last_seen_time(self):
|
|
31
|
+
raise NotImplementedError()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class TaskManager(metaclass=abc.ABCMeta):
|
|
35
|
+
@abc.abstractmethod
|
|
36
|
+
async def on_task_new(self, task: Task):
|
|
37
|
+
raise NotImplementedError()
|
|
38
|
+
|
|
39
|
+
@abc.abstractmethod
|
|
40
|
+
async def on_task_result(self, result: TaskResult):
|
|
41
|
+
raise NotImplementedError()
|
|
42
|
+
|
|
43
|
+
@abc.abstractmethod
|
|
44
|
+
def on_cancel_task(self, task_cancel: TaskCancel):
|
|
45
|
+
raise NotImplementedError()
|
|
46
|
+
|
|
47
|
+
@abc.abstractmethod
|
|
48
|
+
def get_queued_size(self):
|
|
49
|
+
raise NotImplementedError()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class ProcessorManager(metaclass=abc.ABCMeta):
|
|
53
|
+
@abc.abstractmethod
|
|
54
|
+
def can_accept_task(self) -> bool:
|
|
55
|
+
raise NotImplementedError()
|
|
56
|
+
|
|
57
|
+
@abc.abstractmethod
|
|
58
|
+
async def wait_until_can_accept_task(self):
|
|
59
|
+
raise NotImplementedError()
|
|
60
|
+
|
|
61
|
+
@abc.abstractmethod
|
|
62
|
+
async def on_processor_initialized(self, processor_id: ProcessorID, processor_initialized: ProcessorInitialized):
|
|
63
|
+
raise NotImplementedError()
|
|
64
|
+
|
|
65
|
+
@abc.abstractmethod
|
|
66
|
+
async def on_task(self, task: Task) -> bool:
|
|
67
|
+
raise NotImplementedError()
|
|
68
|
+
|
|
69
|
+
@abc.abstractmethod
|
|
70
|
+
async def on_cancel_task(self, task_id: TaskID) -> Optional[Task]:
|
|
71
|
+
raise NotImplementedError()
|
|
72
|
+
|
|
73
|
+
@abc.abstractmethod
|
|
74
|
+
async def on_failing_processor(self, processor_id: ProcessorID, process_status: str):
|
|
75
|
+
raise NotImplementedError()
|
|
76
|
+
|
|
77
|
+
@abc.abstractmethod
|
|
78
|
+
async def on_suspend_task(self, task_id: TaskID) -> bool:
|
|
79
|
+
raise NotImplementedError()
|
|
80
|
+
|
|
81
|
+
@abc.abstractmethod
|
|
82
|
+
def on_resume_task(self, task_id: TaskID) -> bool:
|
|
83
|
+
raise NotImplementedError()
|
|
84
|
+
|
|
85
|
+
@abc.abstractmethod
|
|
86
|
+
async def on_task_result(self, processor_id: ProcessorID, task_result: TaskResult):
|
|
87
|
+
raise NotImplementedError()
|
|
88
|
+
|
|
89
|
+
@abc.abstractmethod
|
|
90
|
+
async def on_external_object_instruction(self, instruction: ObjectInstruction):
|
|
91
|
+
raise NotImplementedError()
|
|
92
|
+
|
|
93
|
+
@abc.abstractmethod
|
|
94
|
+
async def on_internal_object_instruction(self, processor_id: ProcessorID, instruction: ObjectInstruction):
|
|
95
|
+
raise NotImplementedError()
|
|
96
|
+
|
|
97
|
+
@abc.abstractmethod
|
|
98
|
+
def destroy(self, reason: str):
|
|
99
|
+
raise NotImplementedError()
|
|
100
|
+
|
|
101
|
+
@abc.abstractmethod
|
|
102
|
+
def current_processor_is_initialized(self) -> bool:
|
|
103
|
+
raise NotImplementedError()
|
|
104
|
+
|
|
105
|
+
@abc.abstractmethod
|
|
106
|
+
def current_task(self) -> Optional[Task]:
|
|
107
|
+
raise NotImplementedError()
|
|
108
|
+
|
|
109
|
+
@abc.abstractmethod
|
|
110
|
+
def current_task_id(self) -> TaskID:
|
|
111
|
+
raise NotImplementedError()
|
|
112
|
+
|
|
113
|
+
@abc.abstractmethod
|
|
114
|
+
def processors(self) -> List[ProcessorHolder]:
|
|
115
|
+
raise NotImplementedError()
|
|
116
|
+
|
|
117
|
+
@abc.abstractmethod
|
|
118
|
+
def num_suspended_processors(self) -> int:
|
|
119
|
+
raise NotImplementedError()
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class ProfilingManager(metaclass=abc.ABCMeta):
|
|
123
|
+
@abc.abstractmethod
|
|
124
|
+
def on_process_start(self, pid: int):
|
|
125
|
+
raise NotImplementedError()
|
|
126
|
+
|
|
127
|
+
@abc.abstractmethod
|
|
128
|
+
def on_process_end(self, pid: int):
|
|
129
|
+
raise NotImplementedError()
|
|
130
|
+
|
|
131
|
+
@abc.abstractmethod
|
|
132
|
+
def on_task_start(self, pid: int, task_id: TaskID):
|
|
133
|
+
raise NotImplementedError()
|
|
134
|
+
|
|
135
|
+
@abc.abstractmethod
|
|
136
|
+
def on_task_end(self, pid: int, task_id: TaskID) -> ProfileResult:
|
|
137
|
+
raise NotImplementedError()
|
|
File without changes
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import ctypes
|
|
2
|
+
import gc
|
|
3
|
+
import logging
|
|
4
|
+
import multiprocessing
|
|
5
|
+
import platform
|
|
6
|
+
import threading
|
|
7
|
+
import time
|
|
8
|
+
from typing import Any, Dict, Optional
|
|
9
|
+
|
|
10
|
+
import cloudpickle
|
|
11
|
+
import psutil
|
|
12
|
+
|
|
13
|
+
from scaler.client.serializer.mixins import Serializer
|
|
14
|
+
from scaler.config.defaults import CLEANUP_INTERVAL_SECONDS
|
|
15
|
+
from scaler.utility.exceptions import DeserializeObjectError
|
|
16
|
+
from scaler.utility.identifiers import ClientID, ObjectID
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ObjectCache(threading.Thread):
|
|
20
|
+
def __init__(self, garbage_collect_interval_seconds: int, trim_memory_threshold_bytes: int):
|
|
21
|
+
threading.Thread.__init__(self)
|
|
22
|
+
|
|
23
|
+
self._serializers: Dict[ClientID, Serializer] = dict()
|
|
24
|
+
|
|
25
|
+
self._garbage_collect_interval_seconds = garbage_collect_interval_seconds
|
|
26
|
+
self._previous_garbage_collect_time = time.time()
|
|
27
|
+
self._trim_memory_threshold_bytes = trim_memory_threshold_bytes
|
|
28
|
+
|
|
29
|
+
self._cached_objects: Dict[ObjectID, Any] = {}
|
|
30
|
+
self._cached_objects_alive_since: Dict[ObjectID, float] = dict()
|
|
31
|
+
self._process = psutil.Process(multiprocessing.current_process().pid)
|
|
32
|
+
self._libc = ctypes.cdll.LoadLibrary("libc.{}".format("so.6" if platform.uname()[0] != "Darwin" else "dylib"))
|
|
33
|
+
|
|
34
|
+
self._stop_event = threading.Event()
|
|
35
|
+
|
|
36
|
+
def run(self) -> None:
|
|
37
|
+
try:
|
|
38
|
+
while not self._stop_event.wait(timeout=CLEANUP_INTERVAL_SECONDS):
|
|
39
|
+
self.__clean_memory()
|
|
40
|
+
finally:
|
|
41
|
+
self.__clear() # gracefully destroy all cached objects
|
|
42
|
+
|
|
43
|
+
def destroy(self) -> None:
|
|
44
|
+
self._stop_event.set()
|
|
45
|
+
|
|
46
|
+
def add_serializer(self, client: ClientID, serializer: Serializer):
|
|
47
|
+
self._serializers[client] = serializer
|
|
48
|
+
|
|
49
|
+
def serialize(self, client: ClientID, obj: Any) -> bytes:
|
|
50
|
+
return self.get_serializer(client).serialize(obj)
|
|
51
|
+
|
|
52
|
+
def deserialize(self, client: ClientID, payload: bytes) -> Any:
|
|
53
|
+
return self.get_serializer(client).deserialize(payload)
|
|
54
|
+
|
|
55
|
+
def add_object(self, client: ClientID, object_id: ObjectID, object_bytes: bytes) -> None:
|
|
56
|
+
|
|
57
|
+
if object_id.is_serializer():
|
|
58
|
+
self.add_serializer(client, cloudpickle.loads(object_bytes))
|
|
59
|
+
else:
|
|
60
|
+
try:
|
|
61
|
+
deserialized = self.deserialize(client, object_bytes)
|
|
62
|
+
except Exception: # noqa
|
|
63
|
+
logging.exception(f"failed to deserialize received {object_id!r}, length={len(object_bytes)}")
|
|
64
|
+
|
|
65
|
+
self._cached_objects[object_id] = deserialized
|
|
66
|
+
self._cached_objects_alive_since[object_id] = time.time()
|
|
67
|
+
|
|
68
|
+
def del_object(self, object_id: ObjectID):
|
|
69
|
+
self._cached_objects_alive_since.pop(object_id, None)
|
|
70
|
+
self._cached_objects.pop(object_id, None)
|
|
71
|
+
|
|
72
|
+
def has_object(self, object_id: ObjectID):
|
|
73
|
+
return object_id in self._cached_objects or object_id in self._serializers
|
|
74
|
+
|
|
75
|
+
def get_object(self, object_id: ObjectID) -> Optional[Any]:
|
|
76
|
+
if object_id not in self._cached_objects:
|
|
77
|
+
raise ValueError(f"cannot get object for {object_id!r}")
|
|
78
|
+
|
|
79
|
+
obj = self._cached_objects[object_id]
|
|
80
|
+
|
|
81
|
+
self._cached_objects_alive_since[object_id] = time.time()
|
|
82
|
+
return obj
|
|
83
|
+
|
|
84
|
+
def get_serializer(self, client: ClientID) -> Serializer:
|
|
85
|
+
serializer = self._serializers.get(client)
|
|
86
|
+
|
|
87
|
+
if serializer is None:
|
|
88
|
+
raise DeserializeObjectError(f"cannot get serializer for {client!r}")
|
|
89
|
+
|
|
90
|
+
return serializer
|
|
91
|
+
|
|
92
|
+
def __clean_memory(self):
|
|
93
|
+
if time.time() - self._previous_garbage_collect_time < self._garbage_collect_interval_seconds:
|
|
94
|
+
return
|
|
95
|
+
|
|
96
|
+
self._previous_garbage_collect_time = time.time()
|
|
97
|
+
|
|
98
|
+
gc.collect()
|
|
99
|
+
|
|
100
|
+
if self._process.memory_info().rss < self._trim_memory_threshold_bytes:
|
|
101
|
+
return
|
|
102
|
+
|
|
103
|
+
self._libc.malloc_trim(0)
|
|
104
|
+
|
|
105
|
+
def __clear(self) -> None:
|
|
106
|
+
self._cached_objects.clear()
|
|
107
|
+
self._cached_objects_alive_since.clear()
|