opengris-scaler 1.12.28__cp313-cp313-musllinux_1_2_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of opengris-scaler might be problematic. Click here for more details.
- opengris_scaler-1.12.28.dist-info/METADATA +728 -0
- opengris_scaler-1.12.28.dist-info/RECORD +187 -0
- opengris_scaler-1.12.28.dist-info/WHEEL +5 -0
- opengris_scaler-1.12.28.dist-info/entry_points.txt +10 -0
- opengris_scaler-1.12.28.dist-info/licenses/LICENSE +201 -0
- opengris_scaler-1.12.28.dist-info/licenses/LICENSE.spdx +7 -0
- opengris_scaler-1.12.28.dist-info/licenses/NOTICE +8 -0
- opengris_scaler.libs/libcapnp-1-e88d5415.0.1.so +0 -0
- opengris_scaler.libs/libgcc_s-2298274a.so.1 +0 -0
- opengris_scaler.libs/libkj-1-9bebd8ac.0.1.so +0 -0
- opengris_scaler.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
- scaler/__init__.py +14 -0
- scaler/about.py +5 -0
- scaler/client/__init__.py +0 -0
- scaler/client/agent/__init__.py +0 -0
- scaler/client/agent/client_agent.py +210 -0
- scaler/client/agent/disconnect_manager.py +27 -0
- scaler/client/agent/future_manager.py +112 -0
- scaler/client/agent/heartbeat_manager.py +74 -0
- scaler/client/agent/mixins.py +89 -0
- scaler/client/agent/object_manager.py +98 -0
- scaler/client/agent/task_manager.py +64 -0
- scaler/client/client.py +658 -0
- scaler/client/future.py +252 -0
- scaler/client/object_buffer.py +129 -0
- scaler/client/object_reference.py +25 -0
- scaler/client/serializer/__init__.py +0 -0
- scaler/client/serializer/default.py +16 -0
- scaler/client/serializer/mixins.py +38 -0
- scaler/cluster/__init__.py +0 -0
- scaler/cluster/cluster.py +115 -0
- scaler/cluster/combo.py +150 -0
- scaler/cluster/object_storage_server.py +45 -0
- scaler/cluster/scheduler.py +86 -0
- scaler/config/__init__.py +0 -0
- scaler/config/defaults.py +94 -0
- scaler/config/loader.py +96 -0
- scaler/config/mixins.py +20 -0
- scaler/config/section/__init__.py +0 -0
- scaler/config/section/cluster.py +55 -0
- scaler/config/section/ecs_worker_adapter.py +85 -0
- scaler/config/section/native_worker_adapter.py +43 -0
- scaler/config/section/object_storage_server.py +8 -0
- scaler/config/section/scheduler.py +54 -0
- scaler/config/section/symphony_worker_adapter.py +47 -0
- scaler/config/section/top.py +13 -0
- scaler/config/section/webui.py +21 -0
- scaler/config/types/__init__.py +0 -0
- scaler/config/types/network_backend.py +12 -0
- scaler/config/types/object_storage_server.py +45 -0
- scaler/config/types/worker.py +62 -0
- scaler/config/types/zmq.py +83 -0
- scaler/entry_points/__init__.py +0 -0
- scaler/entry_points/cluster.py +133 -0
- scaler/entry_points/object_storage_server.py +45 -0
- scaler/entry_points/scheduler.py +144 -0
- scaler/entry_points/top.py +286 -0
- scaler/entry_points/webui.py +48 -0
- scaler/entry_points/worker_adapter_ecs.py +191 -0
- scaler/entry_points/worker_adapter_native.py +137 -0
- scaler/entry_points/worker_adapter_symphony.py +98 -0
- scaler/io/__init__.py +0 -0
- scaler/io/async_binder.py +89 -0
- scaler/io/async_connector.py +95 -0
- scaler/io/async_object_storage_connector.py +225 -0
- scaler/io/mixins.py +154 -0
- scaler/io/sync_connector.py +68 -0
- scaler/io/sync_object_storage_connector.py +247 -0
- scaler/io/sync_subscriber.py +83 -0
- scaler/io/utility.py +80 -0
- scaler/io/ymq/__init__.py +0 -0
- scaler/io/ymq/_ymq.pyi +95 -0
- scaler/io/ymq/ymq.py +138 -0
- scaler/io/ymq_async_object_storage_connector.py +184 -0
- scaler/io/ymq_sync_object_storage_connector.py +184 -0
- scaler/object_storage/__init__.py +0 -0
- scaler/protocol/__init__.py +0 -0
- scaler/protocol/capnp/__init__.py +0 -0
- scaler/protocol/capnp/_python.py +6 -0
- scaler/protocol/capnp/common.capnp +68 -0
- scaler/protocol/capnp/message.capnp +218 -0
- scaler/protocol/capnp/object_storage.capnp +57 -0
- scaler/protocol/capnp/status.capnp +73 -0
- scaler/protocol/introduction.md +105 -0
- scaler/protocol/python/__init__.py +0 -0
- scaler/protocol/python/common.py +140 -0
- scaler/protocol/python/message.py +751 -0
- scaler/protocol/python/mixins.py +13 -0
- scaler/protocol/python/object_storage.py +118 -0
- scaler/protocol/python/status.py +279 -0
- scaler/protocol/worker.md +228 -0
- scaler/scheduler/__init__.py +0 -0
- scaler/scheduler/allocate_policy/__init__.py +0 -0
- scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
- scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
- scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
- scaler/scheduler/allocate_policy/mixins.py +55 -0
- scaler/scheduler/controllers/__init__.py +0 -0
- scaler/scheduler/controllers/balance_controller.py +65 -0
- scaler/scheduler/controllers/client_controller.py +131 -0
- scaler/scheduler/controllers/config_controller.py +31 -0
- scaler/scheduler/controllers/graph_controller.py +424 -0
- scaler/scheduler/controllers/information_controller.py +81 -0
- scaler/scheduler/controllers/mixins.py +194 -0
- scaler/scheduler/controllers/object_controller.py +147 -0
- scaler/scheduler/controllers/scaling_policies/__init__.py +0 -0
- scaler/scheduler/controllers/scaling_policies/fixed_elastic.py +145 -0
- scaler/scheduler/controllers/scaling_policies/mixins.py +10 -0
- scaler/scheduler/controllers/scaling_policies/null.py +14 -0
- scaler/scheduler/controllers/scaling_policies/types.py +9 -0
- scaler/scheduler/controllers/scaling_policies/utility.py +20 -0
- scaler/scheduler/controllers/scaling_policies/vanilla.py +95 -0
- scaler/scheduler/controllers/task_controller.py +376 -0
- scaler/scheduler/controllers/worker_controller.py +169 -0
- scaler/scheduler/object_usage/__init__.py +0 -0
- scaler/scheduler/object_usage/object_tracker.py +131 -0
- scaler/scheduler/scheduler.py +251 -0
- scaler/scheduler/task/__init__.py +0 -0
- scaler/scheduler/task/task_state_machine.py +92 -0
- scaler/scheduler/task/task_state_manager.py +61 -0
- scaler/ui/__init__.py +0 -0
- scaler/ui/constants.py +9 -0
- scaler/ui/live_display.py +147 -0
- scaler/ui/memory_window.py +146 -0
- scaler/ui/setting_page.py +40 -0
- scaler/ui/task_graph.py +832 -0
- scaler/ui/task_log.py +107 -0
- scaler/ui/utility.py +66 -0
- scaler/ui/webui.py +147 -0
- scaler/ui/worker_processors.py +104 -0
- scaler/utility/__init__.py +0 -0
- scaler/utility/debug.py +19 -0
- scaler/utility/event_list.py +63 -0
- scaler/utility/event_loop.py +58 -0
- scaler/utility/exceptions.py +42 -0
- scaler/utility/formatter.py +44 -0
- scaler/utility/graph/__init__.py +0 -0
- scaler/utility/graph/optimization.py +27 -0
- scaler/utility/graph/topological_sorter.py +11 -0
- scaler/utility/graph/topological_sorter_graphblas.py +174 -0
- scaler/utility/identifiers.py +107 -0
- scaler/utility/logging/__init__.py +0 -0
- scaler/utility/logging/decorators.py +25 -0
- scaler/utility/logging/scoped_logger.py +33 -0
- scaler/utility/logging/utility.py +183 -0
- scaler/utility/many_to_many_dict.py +123 -0
- scaler/utility/metadata/__init__.py +0 -0
- scaler/utility/metadata/profile_result.py +31 -0
- scaler/utility/metadata/task_flags.py +30 -0
- scaler/utility/mixins.py +13 -0
- scaler/utility/network_util.py +7 -0
- scaler/utility/one_to_many_dict.py +72 -0
- scaler/utility/queues/__init__.py +0 -0
- scaler/utility/queues/async_indexed_queue.py +37 -0
- scaler/utility/queues/async_priority_queue.py +70 -0
- scaler/utility/queues/async_sorted_priority_queue.py +45 -0
- scaler/utility/queues/indexed_queue.py +114 -0
- scaler/utility/serialization.py +9 -0
- scaler/version.txt +1 -0
- scaler/worker/__init__.py +0 -0
- scaler/worker/agent/__init__.py +0 -0
- scaler/worker/agent/heartbeat_manager.py +107 -0
- scaler/worker/agent/mixins.py +137 -0
- scaler/worker/agent/processor/__init__.py +0 -0
- scaler/worker/agent/processor/object_cache.py +107 -0
- scaler/worker/agent/processor/processor.py +285 -0
- scaler/worker/agent/processor/streaming_buffer.py +28 -0
- scaler/worker/agent/processor_holder.py +147 -0
- scaler/worker/agent/processor_manager.py +369 -0
- scaler/worker/agent/profiling_manager.py +109 -0
- scaler/worker/agent/task_manager.py +150 -0
- scaler/worker/agent/timeout_manager.py +19 -0
- scaler/worker/preload.py +84 -0
- scaler/worker/worker.py +265 -0
- scaler/worker_adapter/__init__.py +0 -0
- scaler/worker_adapter/common.py +26 -0
- scaler/worker_adapter/ecs.py +269 -0
- scaler/worker_adapter/native.py +155 -0
- scaler/worker_adapter/symphony/__init__.py +0 -0
- scaler/worker_adapter/symphony/callback.py +45 -0
- scaler/worker_adapter/symphony/heartbeat_manager.py +79 -0
- scaler/worker_adapter/symphony/message.py +24 -0
- scaler/worker_adapter/symphony/task_manager.py +289 -0
- scaler/worker_adapter/symphony/worker.py +204 -0
- scaler/worker_adapter/symphony/worker_adapter.py +139 -0
- src/scaler/io/ymq/_ymq.so +0 -0
- src/scaler/object_storage/object_storage_server.so +0 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import struct
|
|
3
|
+
|
|
4
|
+
from scaler.protocol.python.message import Task
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclasses.dataclass
|
|
8
|
+
class TaskFlags:
|
|
9
|
+
profiling: bool = dataclasses.field(default=True)
|
|
10
|
+
priority: int = dataclasses.field(default=0)
|
|
11
|
+
stream_output: bool = dataclasses.field(default=False)
|
|
12
|
+
|
|
13
|
+
FORMAT = "!?i?"
|
|
14
|
+
|
|
15
|
+
def serialize(self) -> bytes:
|
|
16
|
+
return struct.pack(TaskFlags.FORMAT, self.profiling, self.priority, self.stream_output)
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def deserialize(data: bytes) -> "TaskFlags":
|
|
20
|
+
return TaskFlags(*struct.unpack(TaskFlags.FORMAT, data))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def retrieve_task_flags_from_task(task: Task) -> TaskFlags:
|
|
24
|
+
if task.metadata == b"":
|
|
25
|
+
return TaskFlags()
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
return TaskFlags.deserialize(task.metadata)
|
|
29
|
+
except struct.error:
|
|
30
|
+
raise ValueError(f"unexpected metadata value (expected {TaskFlags.__name__}).")
|
scaler/utility/mixins.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Looper(metaclass=abc.ABCMeta):
|
|
5
|
+
@abc.abstractmethod
|
|
6
|
+
async def routine(self):
|
|
7
|
+
raise NotImplementedError()
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Reporter(metaclass=abc.ABCMeta):
|
|
11
|
+
@abc.abstractmethod
|
|
12
|
+
def get_status(self):
|
|
13
|
+
raise NotImplementedError()
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from typing import Dict, Generic, Set, TypeVar
|
|
2
|
+
|
|
3
|
+
KeyT = TypeVar("KeyT")
|
|
4
|
+
ValueT = TypeVar("ValueT")
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class OneToManyDict(Generic[KeyT, ValueT]):
|
|
8
|
+
def __init__(self):
|
|
9
|
+
self._key_to_value_set: Dict[KeyT, Set[ValueT]] = dict()
|
|
10
|
+
self._value_to_key: Dict[ValueT, KeyT] = dict()
|
|
11
|
+
|
|
12
|
+
def __contains__(self, key) -> bool:
|
|
13
|
+
return self.has_key(key)
|
|
14
|
+
|
|
15
|
+
def keys(self):
|
|
16
|
+
return self._key_to_value_set.keys()
|
|
17
|
+
|
|
18
|
+
def values(self):
|
|
19
|
+
return self._key_to_value_set.values()
|
|
20
|
+
|
|
21
|
+
def items(self):
|
|
22
|
+
return self._key_to_value_set.items()
|
|
23
|
+
|
|
24
|
+
def add(self, key: KeyT, value: ValueT):
|
|
25
|
+
if value in self._value_to_key and self._value_to_key[value] != key:
|
|
26
|
+
raise ValueError("value has to be unique in OneToManyDict")
|
|
27
|
+
|
|
28
|
+
self._value_to_key[value] = key
|
|
29
|
+
|
|
30
|
+
if key not in self._key_to_value_set:
|
|
31
|
+
self._key_to_value_set[key] = set()
|
|
32
|
+
|
|
33
|
+
self._key_to_value_set[key].add(value)
|
|
34
|
+
|
|
35
|
+
def has_key(self, key: KeyT) -> bool:
|
|
36
|
+
return key in self._key_to_value_set
|
|
37
|
+
|
|
38
|
+
def has_value(self, value: ValueT) -> bool:
|
|
39
|
+
return value in self._value_to_key
|
|
40
|
+
|
|
41
|
+
def get_key(self, value: ValueT) -> KeyT:
|
|
42
|
+
if value not in self._value_to_key:
|
|
43
|
+
raise ValueError(f"cannot find {value=} in OneToManyDict")
|
|
44
|
+
|
|
45
|
+
return self._value_to_key[value]
|
|
46
|
+
|
|
47
|
+
def get_values(self, key: KeyT) -> Set[ValueT]:
|
|
48
|
+
if key not in self._key_to_value_set:
|
|
49
|
+
raise ValueError(f"cannot find {key=} in OneToManyDict")
|
|
50
|
+
|
|
51
|
+
return self._key_to_value_set[key]
|
|
52
|
+
|
|
53
|
+
def remove_key(self, key: KeyT) -> Set[ValueT]:
|
|
54
|
+
if key not in self._key_to_value_set:
|
|
55
|
+
raise KeyError(f"cannot find {key=} in OneToManyDict")
|
|
56
|
+
|
|
57
|
+
values = self._key_to_value_set.pop(key)
|
|
58
|
+
for value in values:
|
|
59
|
+
self._value_to_key.pop(value)
|
|
60
|
+
|
|
61
|
+
return values
|
|
62
|
+
|
|
63
|
+
def remove_value(self, value: ValueT) -> KeyT:
|
|
64
|
+
if value not in self._value_to_key:
|
|
65
|
+
raise ValueError(f"cannot find {value=} in OneToManyDict")
|
|
66
|
+
|
|
67
|
+
key = self._value_to_key.pop(value)
|
|
68
|
+
self._key_to_value_set[key].remove(value)
|
|
69
|
+
if not self._key_to_value_set[key]:
|
|
70
|
+
self._key_to_value_set.pop(key)
|
|
71
|
+
|
|
72
|
+
return key
|
|
File without changes
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from asyncio import Queue, QueueEmpty
|
|
2
|
+
from typing import Generic, TypeVar
|
|
3
|
+
|
|
4
|
+
from scaler.utility.queues.indexed_queue import IndexedQueue
|
|
5
|
+
|
|
6
|
+
ItemType = TypeVar("ItemType")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AsyncIndexedQueue(Queue, Generic[ItemType]):
|
|
10
|
+
"""This should have same set of features as asyncio.Queue, with additional methods like remove
|
|
11
|
+
- it behaves like regular async queue, except:
|
|
12
|
+
- all the items pushed to queue should be hashable
|
|
13
|
+
- those items should be unique in queue
|
|
14
|
+
- IndexedQueue.put(), IndexedQueue.get(), IndexedQueue.remove() should all take O(1) time complexity
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __contains__(self, item: ItemType):
|
|
18
|
+
return item in self._queue
|
|
19
|
+
|
|
20
|
+
def __len__(self):
|
|
21
|
+
return self._queue.__len__()
|
|
22
|
+
|
|
23
|
+
def _init(self, maxsize):
|
|
24
|
+
self._queue = IndexedQueue()
|
|
25
|
+
|
|
26
|
+
def _put(self, item: ItemType):
|
|
27
|
+
self._queue.put(item)
|
|
28
|
+
|
|
29
|
+
def _get(self):
|
|
30
|
+
try:
|
|
31
|
+
return self._queue.get()
|
|
32
|
+
except IndexError:
|
|
33
|
+
raise QueueEmpty(f"{self.__class__.__name__} queue empty")
|
|
34
|
+
|
|
35
|
+
def remove(self, item: ItemType):
|
|
36
|
+
"""remove the item in the queue in O(1) time complexity"""
|
|
37
|
+
self._queue.remove(item)
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import heapq
|
|
2
|
+
import sys
|
|
3
|
+
from asyncio import Queue
|
|
4
|
+
from typing import Any, Dict, List, Tuple, Union
|
|
5
|
+
|
|
6
|
+
PriorityType = Union[int, Tuple["PriorityType", ...]]
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AsyncPriorityQueue(Queue):
|
|
10
|
+
"""A subclass of Queue; retrieves entries in priority order (lowest first).
|
|
11
|
+
|
|
12
|
+
Entries are typically list of the form: [priority, data].
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __len__(self):
|
|
16
|
+
return len(self._queue)
|
|
17
|
+
|
|
18
|
+
def _init(self, maxsize):
|
|
19
|
+
self._queue: List[List] = []
|
|
20
|
+
self._locator: Dict[bytes, List] = {}
|
|
21
|
+
|
|
22
|
+
def _put(self, item):
|
|
23
|
+
if not isinstance(item, list):
|
|
24
|
+
item = list(item)
|
|
25
|
+
|
|
26
|
+
heapq.heappush(self._queue, item)
|
|
27
|
+
self._locator[item[1]] = item
|
|
28
|
+
|
|
29
|
+
def _get(self):
|
|
30
|
+
priority, data = heapq.heappop(self._queue)
|
|
31
|
+
self._locator.pop(data)
|
|
32
|
+
return priority, data
|
|
33
|
+
|
|
34
|
+
def remove(self, data):
|
|
35
|
+
# this operation is O(n), first change priority to -1 and pop from top of the heap, mark it as invalid
|
|
36
|
+
# entry in the heap is not good idea as those invalid, entry will never get removed, so we used heapq internal
|
|
37
|
+
# function _siftdown to maintain min heap invariant
|
|
38
|
+
item = self._locator.pop(data)
|
|
39
|
+
i = self._queue.index(item) # O(n)
|
|
40
|
+
item[0] = self.__to_lowest_priority(item[0])
|
|
41
|
+
heapq._siftdown(self._queue, 0, i) # type: ignore[attr-defined]
|
|
42
|
+
assert heapq.heappop(self._queue) == item
|
|
43
|
+
|
|
44
|
+
def decrease_priority(self, data):
|
|
45
|
+
# this operation should be O(n), mark it as invalid entry in the heap is not good idea as those invalid
|
|
46
|
+
# entry will never get removed, so we used heapq internal function _siftdown to maintain min heap invariant
|
|
47
|
+
item = self._locator[data]
|
|
48
|
+
i = self._queue.index(item) # O(n)
|
|
49
|
+
item[0] = self.__to_lower_priority(item[0])
|
|
50
|
+
heapq._siftdown(self._queue, 0, i) # type: ignore[attr-defined]
|
|
51
|
+
|
|
52
|
+
def max_priority_item(self) -> Tuple[PriorityType, Any]:
|
|
53
|
+
"""output the Tuple of top priority number and top priority item"""
|
|
54
|
+
item = heapq.heappop(self._queue)
|
|
55
|
+
heapq.heappush(self._queue, item)
|
|
56
|
+
return item[0], item[1]
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def __to_lowest_priority(cls, original_priority: PriorityType) -> PriorityType:
|
|
60
|
+
if isinstance(original_priority, tuple):
|
|
61
|
+
return tuple(cls.__to_lowest_priority(value) for value in original_priority)
|
|
62
|
+
else:
|
|
63
|
+
return -sys.maxsize - 1
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def __to_lower_priority(cls, original_priority: PriorityType) -> PriorityType:
|
|
67
|
+
if isinstance(original_priority, tuple):
|
|
68
|
+
return tuple(cls.__to_lower_priority(value) for value in original_priority)
|
|
69
|
+
else:
|
|
70
|
+
return original_priority - 1
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from asyncio import Queue
|
|
2
|
+
from typing import Any, Dict
|
|
3
|
+
|
|
4
|
+
from scaler.utility.queues.async_priority_queue import AsyncPriorityQueue
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class AsyncSortedPriorityQueue(Queue):
|
|
8
|
+
"""A subclass of Queue; retrieves entries in priority order (lowest first), and then by adding order.
|
|
9
|
+
|
|
10
|
+
Entries are typically list of the form: [priority number, data].
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __len__(self):
|
|
14
|
+
return len(self._queue)
|
|
15
|
+
|
|
16
|
+
def _init(self, maxsize: int):
|
|
17
|
+
self._queue = AsyncPriorityQueue()
|
|
18
|
+
|
|
19
|
+
# Keeps an item count to assign monotonic integer to queued items, so to also keep the priority queue sorted by
|
|
20
|
+
# adding order.
|
|
21
|
+
# See https://docs.python.org/3/library/heapq.html#priority-queue-implementation-notes.
|
|
22
|
+
self._item_counter: int = 0
|
|
23
|
+
self._data_to_item_id: Dict[Any, int] = dict()
|
|
24
|
+
|
|
25
|
+
def _put(self, item) -> None:
|
|
26
|
+
priority, data = item
|
|
27
|
+
|
|
28
|
+
if data in self._data_to_item_id:
|
|
29
|
+
raise ValueError(f"item `{data}` already in the queue")
|
|
30
|
+
|
|
31
|
+
item_id = self._item_counter
|
|
32
|
+
self._item_counter += 1
|
|
33
|
+
|
|
34
|
+
self._queue._put([priority, (item_id, data)])
|
|
35
|
+
self._data_to_item_id[data] = item_id
|
|
36
|
+
|
|
37
|
+
def _get(self):
|
|
38
|
+
priority, (_, data) = self._queue._get()
|
|
39
|
+
self._data_to_item_id.pop(data)
|
|
40
|
+
|
|
41
|
+
return [priority, data]
|
|
42
|
+
|
|
43
|
+
def remove(self, data: Any) -> None:
|
|
44
|
+
item_id = self._data_to_item_id.pop(data)
|
|
45
|
+
self._queue.remove((item_id, data))
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
from typing import Any, Dict, Hashable, Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclasses.dataclass
|
|
6
|
+
class _Node:
|
|
7
|
+
value: Any
|
|
8
|
+
prev: Optional["_Node"] = None
|
|
9
|
+
next: Optional["_Node"] = None
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class _DoubleLinkedList:
|
|
13
|
+
def __init__(self):
|
|
14
|
+
self._head: Optional[_Node] = None
|
|
15
|
+
self._tail: Optional[_Node] = None
|
|
16
|
+
self._size = 0
|
|
17
|
+
|
|
18
|
+
def __len__(self):
|
|
19
|
+
return self._size
|
|
20
|
+
|
|
21
|
+
def add_to_head(self, node: _Node):
|
|
22
|
+
if self._head is None:
|
|
23
|
+
self._head = node
|
|
24
|
+
self._tail = node
|
|
25
|
+
else:
|
|
26
|
+
node.next = self._head
|
|
27
|
+
self._head.prev = node
|
|
28
|
+
self._head = node
|
|
29
|
+
|
|
30
|
+
self._size += 1
|
|
31
|
+
|
|
32
|
+
def remove_tail(self):
|
|
33
|
+
if self._tail is None:
|
|
34
|
+
raise IndexError(f"{self.__class__.__name__} queue empty")
|
|
35
|
+
|
|
36
|
+
node = self._tail
|
|
37
|
+
if self._tail.prev is None:
|
|
38
|
+
self._head = None
|
|
39
|
+
self._tail = None
|
|
40
|
+
else:
|
|
41
|
+
self._tail = self._tail.prev
|
|
42
|
+
self._tail.next = None
|
|
43
|
+
|
|
44
|
+
self._size -= 1
|
|
45
|
+
return node
|
|
46
|
+
|
|
47
|
+
def remove(self, node: _Node):
|
|
48
|
+
prev_node = node.prev
|
|
49
|
+
next_node = node.next
|
|
50
|
+
if prev_node and next_node:
|
|
51
|
+
prev_node.next = next_node
|
|
52
|
+
next_node.prev = prev_node
|
|
53
|
+
|
|
54
|
+
elif not prev_node and not next_node:
|
|
55
|
+
assert self._head is node
|
|
56
|
+
assert self._tail is node
|
|
57
|
+
self._head = None
|
|
58
|
+
self._tail = None
|
|
59
|
+
|
|
60
|
+
elif prev_node and not next_node:
|
|
61
|
+
assert self._tail is node
|
|
62
|
+
prev_node.next = None
|
|
63
|
+
self._tail = prev_node
|
|
64
|
+
|
|
65
|
+
elif not prev_node and next_node:
|
|
66
|
+
assert self._head is node
|
|
67
|
+
next_node.prev = None
|
|
68
|
+
self._head = next_node
|
|
69
|
+
|
|
70
|
+
self._size -= 1
|
|
71
|
+
del node
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class IndexedQueue:
|
|
75
|
+
"""A queue that provides O(1) operations for adding and removing any item."""
|
|
76
|
+
|
|
77
|
+
def __init__(self):
|
|
78
|
+
self._double_linked_list = _DoubleLinkedList()
|
|
79
|
+
self._hash_map: Dict[int, _Node] = {}
|
|
80
|
+
|
|
81
|
+
def __contains__(self, item: Hashable):
|
|
82
|
+
key = hash(item)
|
|
83
|
+
return key in self._hash_map
|
|
84
|
+
|
|
85
|
+
def __len__(self):
|
|
86
|
+
return self._double_linked_list.__len__()
|
|
87
|
+
|
|
88
|
+
def __iter__(self):
|
|
89
|
+
node = self._double_linked_list._tail
|
|
90
|
+
while node is not None:
|
|
91
|
+
yield node.value
|
|
92
|
+
node = node.prev
|
|
93
|
+
|
|
94
|
+
def put(self, item: Hashable):
|
|
95
|
+
key = hash(item)
|
|
96
|
+
if key in self._hash_map:
|
|
97
|
+
raise KeyError(f"{self.__class__.__name__} already have item: {item}")
|
|
98
|
+
|
|
99
|
+
node = _Node(item)
|
|
100
|
+
self._double_linked_list.add_to_head(node)
|
|
101
|
+
self._hash_map[key] = node
|
|
102
|
+
|
|
103
|
+
def get(self):
|
|
104
|
+
node = self._double_linked_list.remove_tail()
|
|
105
|
+
del self._hash_map[hash(node.value)]
|
|
106
|
+
return node.value
|
|
107
|
+
|
|
108
|
+
def remove(self, item: Hashable):
|
|
109
|
+
key = hash(item)
|
|
110
|
+
if key not in self._hash_map:
|
|
111
|
+
raise ValueError(f"{self.__class__.__name__} doesn't have item: {item}")
|
|
112
|
+
|
|
113
|
+
node = self._hash_map.pop(key)
|
|
114
|
+
self._double_linked_list.remove(node)
|
scaler/version.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
1.12.28
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from typing import Dict, Optional
|
|
3
|
+
|
|
4
|
+
import psutil
|
|
5
|
+
|
|
6
|
+
from scaler.config.types.object_storage_server import ObjectStorageConfig
|
|
7
|
+
from scaler.io.mixins import AsyncConnector, AsyncObjectStorageConnector
|
|
8
|
+
from scaler.protocol.python.message import Resource, WorkerHeartbeat, WorkerHeartbeatEcho
|
|
9
|
+
from scaler.protocol.python.status import ProcessorStatus
|
|
10
|
+
from scaler.utility.mixins import Looper
|
|
11
|
+
from scaler.worker.agent.mixins import HeartbeatManager, ProcessorManager, TaskManager, TimeoutManager
|
|
12
|
+
from scaler.worker.agent.processor_holder import ProcessorHolder
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class VanillaHeartbeatManager(Looper, HeartbeatManager):
|
|
16
|
+
def __init__(
|
|
17
|
+
self, object_storage_address: Optional[ObjectStorageConfig], capabilities: Dict[str, int], task_queue_size: int
|
|
18
|
+
):
|
|
19
|
+
self._agent_process = psutil.Process()
|
|
20
|
+
self._capabilities = capabilities
|
|
21
|
+
self._task_queue_size = task_queue_size
|
|
22
|
+
|
|
23
|
+
self._connector_external: Optional[AsyncConnector] = None
|
|
24
|
+
self._connector_storage: Optional[AsyncObjectStorageConnector] = None
|
|
25
|
+
self._worker_task_manager: Optional[TaskManager] = None
|
|
26
|
+
self._timeout_manager: Optional[TimeoutManager] = None
|
|
27
|
+
self._processor_manager: Optional[ProcessorManager] = None
|
|
28
|
+
|
|
29
|
+
self._start_timestamp_ns = 0
|
|
30
|
+
self._latency_us = 0
|
|
31
|
+
|
|
32
|
+
self._object_storage_address: Optional[ObjectStorageConfig] = object_storage_address
|
|
33
|
+
|
|
34
|
+
def register(
|
|
35
|
+
self,
|
|
36
|
+
connector_external: AsyncConnector,
|
|
37
|
+
connector_storage: AsyncObjectStorageConnector,
|
|
38
|
+
worker_task_manager: TaskManager,
|
|
39
|
+
timeout_manager: TimeoutManager,
|
|
40
|
+
processor_manager: ProcessorManager,
|
|
41
|
+
):
|
|
42
|
+
self._connector_external = connector_external
|
|
43
|
+
self._connector_storage = connector_storage
|
|
44
|
+
self._worker_task_manager = worker_task_manager
|
|
45
|
+
self._timeout_manager = timeout_manager
|
|
46
|
+
self._processor_manager = processor_manager
|
|
47
|
+
|
|
48
|
+
async def on_heartbeat_echo(self, heartbeat: WorkerHeartbeatEcho):
|
|
49
|
+
if self._start_timestamp_ns == 0:
|
|
50
|
+
# not handling echo if we didn't send out heartbeat
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
self._latency_us = int(((time.time_ns() - self._start_timestamp_ns) / 2) // 1_000)
|
|
54
|
+
self._start_timestamp_ns = 0
|
|
55
|
+
self._timeout_manager.update_last_seen_time()
|
|
56
|
+
|
|
57
|
+
if self._object_storage_address is None:
|
|
58
|
+
address_message = heartbeat.object_storage_address()
|
|
59
|
+
self._object_storage_address = ObjectStorageConfig(address_message.host, address_message.port)
|
|
60
|
+
await self._connector_storage.connect(self._object_storage_address.host, self._object_storage_address.port)
|
|
61
|
+
|
|
62
|
+
async def routine(self):
|
|
63
|
+
processors = self._processor_manager.processors()
|
|
64
|
+
|
|
65
|
+
if self._start_timestamp_ns != 0:
|
|
66
|
+
# already sent heartbeat, expecting heartbeat echo, so not sending
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
for processor_holder in processors:
|
|
70
|
+
status = processor_holder.process().status()
|
|
71
|
+
if status in {psutil.STATUS_ZOMBIE, psutil.STATUS_DEAD}:
|
|
72
|
+
await self._processor_manager.on_failing_processor(processor_holder.processor_id(), status)
|
|
73
|
+
|
|
74
|
+
processors = self._processor_manager.processors() # refreshes for removed dead and zombie processors
|
|
75
|
+
num_suspended_processors = self._processor_manager.num_suspended_processors()
|
|
76
|
+
|
|
77
|
+
# TODO: add task queue size to WorkerHeartbeat
|
|
78
|
+
await self._connector_external.send(
|
|
79
|
+
WorkerHeartbeat.new_msg(
|
|
80
|
+
Resource.new_msg(int(self._agent_process.cpu_percent() * 10), self._agent_process.memory_info().rss),
|
|
81
|
+
psutil.virtual_memory().available,
|
|
82
|
+
self._task_queue_size,
|
|
83
|
+
self._worker_task_manager.get_queued_size() - num_suspended_processors,
|
|
84
|
+
self._latency_us,
|
|
85
|
+
self._processor_manager.can_accept_task(),
|
|
86
|
+
[self.__get_processor_status_from_holder(processor) for processor in processors],
|
|
87
|
+
self._capabilities,
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
self._start_timestamp_ns = time.time_ns()
|
|
91
|
+
|
|
92
|
+
def get_object_storage_address(self) -> Optional[ObjectStorageConfig]:
|
|
93
|
+
return self._object_storage_address
|
|
94
|
+
|
|
95
|
+
@staticmethod
|
|
96
|
+
def __get_processor_status_from_holder(processor: ProcessorHolder) -> ProcessorStatus:
|
|
97
|
+
process = processor.process()
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
resource = Resource.new_msg(int(process.cpu_percent() * 10), process.memory_info().rss)
|
|
101
|
+
except psutil.ZombieProcess:
|
|
102
|
+
# Assumes dead processes do not use any resources
|
|
103
|
+
resource = Resource.new_msg(0, 0)
|
|
104
|
+
|
|
105
|
+
return ProcessorStatus.new_msg(
|
|
106
|
+
processor.pid(), processor.initialized(), processor.task() is not None, processor.suspended(), resource
|
|
107
|
+
)
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from scaler.config.types.object_storage_server import ObjectStorageConfig
|
|
5
|
+
from scaler.protocol.python.message import (
|
|
6
|
+
ObjectInstruction,
|
|
7
|
+
ProcessorInitialized,
|
|
8
|
+
Task,
|
|
9
|
+
TaskCancel,
|
|
10
|
+
TaskResult,
|
|
11
|
+
WorkerHeartbeatEcho,
|
|
12
|
+
)
|
|
13
|
+
from scaler.utility.identifiers import ProcessorID, TaskID
|
|
14
|
+
from scaler.utility.metadata.profile_result import ProfileResult
|
|
15
|
+
from scaler.worker.agent.processor_holder import ProcessorHolder
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class HeartbeatManager(metaclass=abc.ABCMeta):
|
|
19
|
+
@abc.abstractmethod
|
|
20
|
+
async def on_heartbeat_echo(self, heartbeat: WorkerHeartbeatEcho):
|
|
21
|
+
raise NotImplementedError()
|
|
22
|
+
|
|
23
|
+
@abc.abstractmethod
|
|
24
|
+
def get_object_storage_address(self) -> ObjectStorageConfig:
|
|
25
|
+
raise NotImplementedError()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TimeoutManager(metaclass=abc.ABCMeta):
|
|
29
|
+
@abc.abstractmethod
|
|
30
|
+
def update_last_seen_time(self):
|
|
31
|
+
raise NotImplementedError()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class TaskManager(metaclass=abc.ABCMeta):
|
|
35
|
+
@abc.abstractmethod
|
|
36
|
+
async def on_task_new(self, task: Task):
|
|
37
|
+
raise NotImplementedError()
|
|
38
|
+
|
|
39
|
+
@abc.abstractmethod
|
|
40
|
+
async def on_task_result(self, result: TaskResult):
|
|
41
|
+
raise NotImplementedError()
|
|
42
|
+
|
|
43
|
+
@abc.abstractmethod
|
|
44
|
+
def on_cancel_task(self, task_cancel: TaskCancel):
|
|
45
|
+
raise NotImplementedError()
|
|
46
|
+
|
|
47
|
+
@abc.abstractmethod
|
|
48
|
+
def get_queued_size(self):
|
|
49
|
+
raise NotImplementedError()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class ProcessorManager(metaclass=abc.ABCMeta):
|
|
53
|
+
@abc.abstractmethod
|
|
54
|
+
def can_accept_task(self) -> bool:
|
|
55
|
+
raise NotImplementedError()
|
|
56
|
+
|
|
57
|
+
@abc.abstractmethod
|
|
58
|
+
async def wait_until_can_accept_task(self):
|
|
59
|
+
raise NotImplementedError()
|
|
60
|
+
|
|
61
|
+
@abc.abstractmethod
|
|
62
|
+
async def on_processor_initialized(self, processor_id: ProcessorID, processor_initialized: ProcessorInitialized):
|
|
63
|
+
raise NotImplementedError()
|
|
64
|
+
|
|
65
|
+
@abc.abstractmethod
|
|
66
|
+
async def on_task(self, task: Task) -> bool:
|
|
67
|
+
raise NotImplementedError()
|
|
68
|
+
|
|
69
|
+
@abc.abstractmethod
|
|
70
|
+
async def on_cancel_task(self, task_id: TaskID) -> Optional[Task]:
|
|
71
|
+
raise NotImplementedError()
|
|
72
|
+
|
|
73
|
+
@abc.abstractmethod
|
|
74
|
+
async def on_failing_processor(self, processor_id: ProcessorID, process_status: str):
|
|
75
|
+
raise NotImplementedError()
|
|
76
|
+
|
|
77
|
+
@abc.abstractmethod
|
|
78
|
+
async def on_suspend_task(self, task_id: TaskID) -> bool:
|
|
79
|
+
raise NotImplementedError()
|
|
80
|
+
|
|
81
|
+
@abc.abstractmethod
|
|
82
|
+
def on_resume_task(self, task_id: TaskID) -> bool:
|
|
83
|
+
raise NotImplementedError()
|
|
84
|
+
|
|
85
|
+
@abc.abstractmethod
|
|
86
|
+
async def on_task_result(self, processor_id: ProcessorID, task_result: TaskResult):
|
|
87
|
+
raise NotImplementedError()
|
|
88
|
+
|
|
89
|
+
@abc.abstractmethod
|
|
90
|
+
async def on_external_object_instruction(self, instruction: ObjectInstruction):
|
|
91
|
+
raise NotImplementedError()
|
|
92
|
+
|
|
93
|
+
@abc.abstractmethod
|
|
94
|
+
async def on_internal_object_instruction(self, processor_id: ProcessorID, instruction: ObjectInstruction):
|
|
95
|
+
raise NotImplementedError()
|
|
96
|
+
|
|
97
|
+
@abc.abstractmethod
|
|
98
|
+
def destroy(self, reason: str):
|
|
99
|
+
raise NotImplementedError()
|
|
100
|
+
|
|
101
|
+
@abc.abstractmethod
|
|
102
|
+
def current_processor_is_initialized(self) -> bool:
|
|
103
|
+
raise NotImplementedError()
|
|
104
|
+
|
|
105
|
+
@abc.abstractmethod
|
|
106
|
+
def current_task(self) -> Optional[Task]:
|
|
107
|
+
raise NotImplementedError()
|
|
108
|
+
|
|
109
|
+
@abc.abstractmethod
|
|
110
|
+
def current_task_id(self) -> TaskID:
|
|
111
|
+
raise NotImplementedError()
|
|
112
|
+
|
|
113
|
+
@abc.abstractmethod
|
|
114
|
+
def processors(self) -> List[ProcessorHolder]:
|
|
115
|
+
raise NotImplementedError()
|
|
116
|
+
|
|
117
|
+
@abc.abstractmethod
|
|
118
|
+
def num_suspended_processors(self) -> int:
|
|
119
|
+
raise NotImplementedError()
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class ProfilingManager(metaclass=abc.ABCMeta):
|
|
123
|
+
@abc.abstractmethod
|
|
124
|
+
def on_process_start(self, pid: int):
|
|
125
|
+
raise NotImplementedError()
|
|
126
|
+
|
|
127
|
+
@abc.abstractmethod
|
|
128
|
+
def on_process_end(self, pid: int):
|
|
129
|
+
raise NotImplementedError()
|
|
130
|
+
|
|
131
|
+
@abc.abstractmethod
|
|
132
|
+
def on_task_start(self, pid: int, task_id: TaskID):
|
|
133
|
+
raise NotImplementedError()
|
|
134
|
+
|
|
135
|
+
@abc.abstractmethod
|
|
136
|
+
def on_task_end(self, pid: int, task_id: TaskID) -> ProfileResult:
|
|
137
|
+
raise NotImplementedError()
|
|
File without changes
|