opengris-scaler 1.12.28__cp313-cp313-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of opengris-scaler might be problematic. Click here for more details.

Files changed (187) hide show
  1. opengris_scaler-1.12.28.dist-info/METADATA +728 -0
  2. opengris_scaler-1.12.28.dist-info/RECORD +187 -0
  3. opengris_scaler-1.12.28.dist-info/WHEEL +5 -0
  4. opengris_scaler-1.12.28.dist-info/entry_points.txt +10 -0
  5. opengris_scaler-1.12.28.dist-info/licenses/LICENSE +201 -0
  6. opengris_scaler-1.12.28.dist-info/licenses/LICENSE.spdx +7 -0
  7. opengris_scaler-1.12.28.dist-info/licenses/NOTICE +8 -0
  8. opengris_scaler.libs/libcapnp-1-e88d5415.0.1.so +0 -0
  9. opengris_scaler.libs/libgcc_s-2298274a.so.1 +0 -0
  10. opengris_scaler.libs/libkj-1-9bebd8ac.0.1.so +0 -0
  11. opengris_scaler.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
  12. scaler/__init__.py +14 -0
  13. scaler/about.py +5 -0
  14. scaler/client/__init__.py +0 -0
  15. scaler/client/agent/__init__.py +0 -0
  16. scaler/client/agent/client_agent.py +210 -0
  17. scaler/client/agent/disconnect_manager.py +27 -0
  18. scaler/client/agent/future_manager.py +112 -0
  19. scaler/client/agent/heartbeat_manager.py +74 -0
  20. scaler/client/agent/mixins.py +89 -0
  21. scaler/client/agent/object_manager.py +98 -0
  22. scaler/client/agent/task_manager.py +64 -0
  23. scaler/client/client.py +658 -0
  24. scaler/client/future.py +252 -0
  25. scaler/client/object_buffer.py +129 -0
  26. scaler/client/object_reference.py +25 -0
  27. scaler/client/serializer/__init__.py +0 -0
  28. scaler/client/serializer/default.py +16 -0
  29. scaler/client/serializer/mixins.py +38 -0
  30. scaler/cluster/__init__.py +0 -0
  31. scaler/cluster/cluster.py +115 -0
  32. scaler/cluster/combo.py +150 -0
  33. scaler/cluster/object_storage_server.py +45 -0
  34. scaler/cluster/scheduler.py +86 -0
  35. scaler/config/__init__.py +0 -0
  36. scaler/config/defaults.py +94 -0
  37. scaler/config/loader.py +96 -0
  38. scaler/config/mixins.py +20 -0
  39. scaler/config/section/__init__.py +0 -0
  40. scaler/config/section/cluster.py +55 -0
  41. scaler/config/section/ecs_worker_adapter.py +85 -0
  42. scaler/config/section/native_worker_adapter.py +43 -0
  43. scaler/config/section/object_storage_server.py +8 -0
  44. scaler/config/section/scheduler.py +54 -0
  45. scaler/config/section/symphony_worker_adapter.py +47 -0
  46. scaler/config/section/top.py +13 -0
  47. scaler/config/section/webui.py +21 -0
  48. scaler/config/types/__init__.py +0 -0
  49. scaler/config/types/network_backend.py +12 -0
  50. scaler/config/types/object_storage_server.py +45 -0
  51. scaler/config/types/worker.py +62 -0
  52. scaler/config/types/zmq.py +83 -0
  53. scaler/entry_points/__init__.py +0 -0
  54. scaler/entry_points/cluster.py +133 -0
  55. scaler/entry_points/object_storage_server.py +45 -0
  56. scaler/entry_points/scheduler.py +144 -0
  57. scaler/entry_points/top.py +286 -0
  58. scaler/entry_points/webui.py +48 -0
  59. scaler/entry_points/worker_adapter_ecs.py +191 -0
  60. scaler/entry_points/worker_adapter_native.py +137 -0
  61. scaler/entry_points/worker_adapter_symphony.py +98 -0
  62. scaler/io/__init__.py +0 -0
  63. scaler/io/async_binder.py +89 -0
  64. scaler/io/async_connector.py +95 -0
  65. scaler/io/async_object_storage_connector.py +225 -0
  66. scaler/io/mixins.py +154 -0
  67. scaler/io/sync_connector.py +68 -0
  68. scaler/io/sync_object_storage_connector.py +247 -0
  69. scaler/io/sync_subscriber.py +83 -0
  70. scaler/io/utility.py +80 -0
  71. scaler/io/ymq/__init__.py +0 -0
  72. scaler/io/ymq/_ymq.pyi +95 -0
  73. scaler/io/ymq/ymq.py +138 -0
  74. scaler/io/ymq_async_object_storage_connector.py +184 -0
  75. scaler/io/ymq_sync_object_storage_connector.py +184 -0
  76. scaler/object_storage/__init__.py +0 -0
  77. scaler/protocol/__init__.py +0 -0
  78. scaler/protocol/capnp/__init__.py +0 -0
  79. scaler/protocol/capnp/_python.py +6 -0
  80. scaler/protocol/capnp/common.capnp +68 -0
  81. scaler/protocol/capnp/message.capnp +218 -0
  82. scaler/protocol/capnp/object_storage.capnp +57 -0
  83. scaler/protocol/capnp/status.capnp +73 -0
  84. scaler/protocol/introduction.md +105 -0
  85. scaler/protocol/python/__init__.py +0 -0
  86. scaler/protocol/python/common.py +140 -0
  87. scaler/protocol/python/message.py +751 -0
  88. scaler/protocol/python/mixins.py +13 -0
  89. scaler/protocol/python/object_storage.py +118 -0
  90. scaler/protocol/python/status.py +279 -0
  91. scaler/protocol/worker.md +228 -0
  92. scaler/scheduler/__init__.py +0 -0
  93. scaler/scheduler/allocate_policy/__init__.py +0 -0
  94. scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
  95. scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
  96. scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
  97. scaler/scheduler/allocate_policy/mixins.py +55 -0
  98. scaler/scheduler/controllers/__init__.py +0 -0
  99. scaler/scheduler/controllers/balance_controller.py +65 -0
  100. scaler/scheduler/controllers/client_controller.py +131 -0
  101. scaler/scheduler/controllers/config_controller.py +31 -0
  102. scaler/scheduler/controllers/graph_controller.py +424 -0
  103. scaler/scheduler/controllers/information_controller.py +81 -0
  104. scaler/scheduler/controllers/mixins.py +194 -0
  105. scaler/scheduler/controllers/object_controller.py +147 -0
  106. scaler/scheduler/controllers/scaling_policies/__init__.py +0 -0
  107. scaler/scheduler/controllers/scaling_policies/fixed_elastic.py +145 -0
  108. scaler/scheduler/controllers/scaling_policies/mixins.py +10 -0
  109. scaler/scheduler/controllers/scaling_policies/null.py +14 -0
  110. scaler/scheduler/controllers/scaling_policies/types.py +9 -0
  111. scaler/scheduler/controllers/scaling_policies/utility.py +20 -0
  112. scaler/scheduler/controllers/scaling_policies/vanilla.py +95 -0
  113. scaler/scheduler/controllers/task_controller.py +376 -0
  114. scaler/scheduler/controllers/worker_controller.py +169 -0
  115. scaler/scheduler/object_usage/__init__.py +0 -0
  116. scaler/scheduler/object_usage/object_tracker.py +131 -0
  117. scaler/scheduler/scheduler.py +251 -0
  118. scaler/scheduler/task/__init__.py +0 -0
  119. scaler/scheduler/task/task_state_machine.py +92 -0
  120. scaler/scheduler/task/task_state_manager.py +61 -0
  121. scaler/ui/__init__.py +0 -0
  122. scaler/ui/constants.py +9 -0
  123. scaler/ui/live_display.py +147 -0
  124. scaler/ui/memory_window.py +146 -0
  125. scaler/ui/setting_page.py +40 -0
  126. scaler/ui/task_graph.py +832 -0
  127. scaler/ui/task_log.py +107 -0
  128. scaler/ui/utility.py +66 -0
  129. scaler/ui/webui.py +147 -0
  130. scaler/ui/worker_processors.py +104 -0
  131. scaler/utility/__init__.py +0 -0
  132. scaler/utility/debug.py +19 -0
  133. scaler/utility/event_list.py +63 -0
  134. scaler/utility/event_loop.py +58 -0
  135. scaler/utility/exceptions.py +42 -0
  136. scaler/utility/formatter.py +44 -0
  137. scaler/utility/graph/__init__.py +0 -0
  138. scaler/utility/graph/optimization.py +27 -0
  139. scaler/utility/graph/topological_sorter.py +11 -0
  140. scaler/utility/graph/topological_sorter_graphblas.py +174 -0
  141. scaler/utility/identifiers.py +107 -0
  142. scaler/utility/logging/__init__.py +0 -0
  143. scaler/utility/logging/decorators.py +25 -0
  144. scaler/utility/logging/scoped_logger.py +33 -0
  145. scaler/utility/logging/utility.py +183 -0
  146. scaler/utility/many_to_many_dict.py +123 -0
  147. scaler/utility/metadata/__init__.py +0 -0
  148. scaler/utility/metadata/profile_result.py +31 -0
  149. scaler/utility/metadata/task_flags.py +30 -0
  150. scaler/utility/mixins.py +13 -0
  151. scaler/utility/network_util.py +7 -0
  152. scaler/utility/one_to_many_dict.py +72 -0
  153. scaler/utility/queues/__init__.py +0 -0
  154. scaler/utility/queues/async_indexed_queue.py +37 -0
  155. scaler/utility/queues/async_priority_queue.py +70 -0
  156. scaler/utility/queues/async_sorted_priority_queue.py +45 -0
  157. scaler/utility/queues/indexed_queue.py +114 -0
  158. scaler/utility/serialization.py +9 -0
  159. scaler/version.txt +1 -0
  160. scaler/worker/__init__.py +0 -0
  161. scaler/worker/agent/__init__.py +0 -0
  162. scaler/worker/agent/heartbeat_manager.py +107 -0
  163. scaler/worker/agent/mixins.py +137 -0
  164. scaler/worker/agent/processor/__init__.py +0 -0
  165. scaler/worker/agent/processor/object_cache.py +107 -0
  166. scaler/worker/agent/processor/processor.py +285 -0
  167. scaler/worker/agent/processor/streaming_buffer.py +28 -0
  168. scaler/worker/agent/processor_holder.py +147 -0
  169. scaler/worker/agent/processor_manager.py +369 -0
  170. scaler/worker/agent/profiling_manager.py +109 -0
  171. scaler/worker/agent/task_manager.py +150 -0
  172. scaler/worker/agent/timeout_manager.py +19 -0
  173. scaler/worker/preload.py +84 -0
  174. scaler/worker/worker.py +265 -0
  175. scaler/worker_adapter/__init__.py +0 -0
  176. scaler/worker_adapter/common.py +26 -0
  177. scaler/worker_adapter/ecs.py +269 -0
  178. scaler/worker_adapter/native.py +155 -0
  179. scaler/worker_adapter/symphony/__init__.py +0 -0
  180. scaler/worker_adapter/symphony/callback.py +45 -0
  181. scaler/worker_adapter/symphony/heartbeat_manager.py +79 -0
  182. scaler/worker_adapter/symphony/message.py +24 -0
  183. scaler/worker_adapter/symphony/task_manager.py +289 -0
  184. scaler/worker_adapter/symphony/worker.py +204 -0
  185. scaler/worker_adapter/symphony/worker_adapter.py +139 -0
  186. src/scaler/io/ymq/_ymq.so +0 -0
  187. src/scaler/object_storage/object_storage_server.so +0 -0
@@ -0,0 +1,30 @@
1
+ import dataclasses
2
+ import struct
3
+
4
+ from scaler.protocol.python.message import Task
5
+
6
+
7
+ @dataclasses.dataclass
8
+ class TaskFlags:
9
+ profiling: bool = dataclasses.field(default=True)
10
+ priority: int = dataclasses.field(default=0)
11
+ stream_output: bool = dataclasses.field(default=False)
12
+
13
+ FORMAT = "!?i?"
14
+
15
+ def serialize(self) -> bytes:
16
+ return struct.pack(TaskFlags.FORMAT, self.profiling, self.priority, self.stream_output)
17
+
18
+ @staticmethod
19
+ def deserialize(data: bytes) -> "TaskFlags":
20
+ return TaskFlags(*struct.unpack(TaskFlags.FORMAT, data))
21
+
22
+
23
+ def retrieve_task_flags_from_task(task: Task) -> TaskFlags:
24
+ if task.metadata == b"":
25
+ return TaskFlags()
26
+
27
+ try:
28
+ return TaskFlags.deserialize(task.metadata)
29
+ except struct.error:
30
+ raise ValueError(f"unexpected metadata value (expected {TaskFlags.__name__}).")
@@ -0,0 +1,13 @@
1
+ import abc
2
+
3
+
4
+ class Looper(metaclass=abc.ABCMeta):
5
+ @abc.abstractmethod
6
+ async def routine(self):
7
+ raise NotImplementedError()
8
+
9
+
10
+ class Reporter(metaclass=abc.ABCMeta):
11
+ @abc.abstractmethod
12
+ def get_status(self):
13
+ raise NotImplementedError()
@@ -0,0 +1,7 @@
1
+ import socket
2
+
3
+
4
+ def get_available_tcp_port(hostname: str = "127.0.0.1") -> int:
5
+ with socket.socket(socket.AddressFamily.AF_INET, socket.SocketKind.SOCK_STREAM) as sock:
6
+ sock.bind((hostname, 0))
7
+ return sock.getsockname()[1]
@@ -0,0 +1,72 @@
1
+ from typing import Dict, Generic, Set, TypeVar
2
+
3
+ KeyT = TypeVar("KeyT")
4
+ ValueT = TypeVar("ValueT")
5
+
6
+
7
+ class OneToManyDict(Generic[KeyT, ValueT]):
8
+ def __init__(self):
9
+ self._key_to_value_set: Dict[KeyT, Set[ValueT]] = dict()
10
+ self._value_to_key: Dict[ValueT, KeyT] = dict()
11
+
12
+ def __contains__(self, key) -> bool:
13
+ return self.has_key(key)
14
+
15
+ def keys(self):
16
+ return self._key_to_value_set.keys()
17
+
18
+ def values(self):
19
+ return self._key_to_value_set.values()
20
+
21
+ def items(self):
22
+ return self._key_to_value_set.items()
23
+
24
+ def add(self, key: KeyT, value: ValueT):
25
+ if value in self._value_to_key and self._value_to_key[value] != key:
26
+ raise ValueError("value has to be unique in OneToManyDict")
27
+
28
+ self._value_to_key[value] = key
29
+
30
+ if key not in self._key_to_value_set:
31
+ self._key_to_value_set[key] = set()
32
+
33
+ self._key_to_value_set[key].add(value)
34
+
35
+ def has_key(self, key: KeyT) -> bool:
36
+ return key in self._key_to_value_set
37
+
38
+ def has_value(self, value: ValueT) -> bool:
39
+ return value in self._value_to_key
40
+
41
+ def get_key(self, value: ValueT) -> KeyT:
42
+ if value not in self._value_to_key:
43
+ raise ValueError(f"cannot find {value=} in OneToManyDict")
44
+
45
+ return self._value_to_key[value]
46
+
47
+ def get_values(self, key: KeyT) -> Set[ValueT]:
48
+ if key not in self._key_to_value_set:
49
+ raise ValueError(f"cannot find {key=} in OneToManyDict")
50
+
51
+ return self._key_to_value_set[key]
52
+
53
+ def remove_key(self, key: KeyT) -> Set[ValueT]:
54
+ if key not in self._key_to_value_set:
55
+ raise KeyError(f"cannot find {key=} in OneToManyDict")
56
+
57
+ values = self._key_to_value_set.pop(key)
58
+ for value in values:
59
+ self._value_to_key.pop(value)
60
+
61
+ return values
62
+
63
+ def remove_value(self, value: ValueT) -> KeyT:
64
+ if value not in self._value_to_key:
65
+ raise ValueError(f"cannot find {value=} in OneToManyDict")
66
+
67
+ key = self._value_to_key.pop(value)
68
+ self._key_to_value_set[key].remove(value)
69
+ if not self._key_to_value_set[key]:
70
+ self._key_to_value_set.pop(key)
71
+
72
+ return key
File without changes
@@ -0,0 +1,37 @@
1
+ from asyncio import Queue, QueueEmpty
2
+ from typing import Generic, TypeVar
3
+
4
+ from scaler.utility.queues.indexed_queue import IndexedQueue
5
+
6
+ ItemType = TypeVar("ItemType")
7
+
8
+
9
+ class AsyncIndexedQueue(Queue, Generic[ItemType]):
10
+ """This should have same set of features as asyncio.Queue, with additional methods like remove
11
+ - it behaves like regular async queue, except:
12
+ - all the items pushed to queue should be hashable
13
+ - those items should be unique in queue
14
+ - IndexedQueue.put(), IndexedQueue.get(), IndexedQueue.remove() should all take O(1) time complexity
15
+ """
16
+
17
+ def __contains__(self, item: ItemType):
18
+ return item in self._queue
19
+
20
+ def __len__(self):
21
+ return self._queue.__len__()
22
+
23
+ def _init(self, maxsize):
24
+ self._queue = IndexedQueue()
25
+
26
+ def _put(self, item: ItemType):
27
+ self._queue.put(item)
28
+
29
+ def _get(self):
30
+ try:
31
+ return self._queue.get()
32
+ except IndexError:
33
+ raise QueueEmpty(f"{self.__class__.__name__} queue empty")
34
+
35
+ def remove(self, item: ItemType):
36
+ """remove the item in the queue in O(1) time complexity"""
37
+ self._queue.remove(item)
@@ -0,0 +1,70 @@
1
+ import heapq
2
+ import sys
3
+ from asyncio import Queue
4
+ from typing import Any, Dict, List, Tuple, Union
5
+
6
+ PriorityType = Union[int, Tuple["PriorityType", ...]]
7
+
8
+
9
+ class AsyncPriorityQueue(Queue):
10
+ """A subclass of Queue; retrieves entries in priority order (lowest first).
11
+
12
+ Entries are typically list of the form: [priority, data].
13
+ """
14
+
15
+ def __len__(self):
16
+ return len(self._queue)
17
+
18
+ def _init(self, maxsize):
19
+ self._queue: List[List] = []
20
+ self._locator: Dict[bytes, List] = {}
21
+
22
+ def _put(self, item):
23
+ if not isinstance(item, list):
24
+ item = list(item)
25
+
26
+ heapq.heappush(self._queue, item)
27
+ self._locator[item[1]] = item
28
+
29
+ def _get(self):
30
+ priority, data = heapq.heappop(self._queue)
31
+ self._locator.pop(data)
32
+ return priority, data
33
+
34
+ def remove(self, data):
35
+ # this operation is O(n), first change priority to -1 and pop from top of the heap, mark it as invalid
36
+ # entry in the heap is not good idea as those invalid, entry will never get removed, so we used heapq internal
37
+ # function _siftdown to maintain min heap invariant
38
+ item = self._locator.pop(data)
39
+ i = self._queue.index(item) # O(n)
40
+ item[0] = self.__to_lowest_priority(item[0])
41
+ heapq._siftdown(self._queue, 0, i) # type: ignore[attr-defined]
42
+ assert heapq.heappop(self._queue) == item
43
+
44
+ def decrease_priority(self, data):
45
+ # this operation should be O(n), mark it as invalid entry in the heap is not good idea as those invalid
46
+ # entry will never get removed, so we used heapq internal function _siftdown to maintain min heap invariant
47
+ item = self._locator[data]
48
+ i = self._queue.index(item) # O(n)
49
+ item[0] = self.__to_lower_priority(item[0])
50
+ heapq._siftdown(self._queue, 0, i) # type: ignore[attr-defined]
51
+
52
+ def max_priority_item(self) -> Tuple[PriorityType, Any]:
53
+ """output the Tuple of top priority number and top priority item"""
54
+ item = heapq.heappop(self._queue)
55
+ heapq.heappush(self._queue, item)
56
+ return item[0], item[1]
57
+
58
+ @classmethod
59
+ def __to_lowest_priority(cls, original_priority: PriorityType) -> PriorityType:
60
+ if isinstance(original_priority, tuple):
61
+ return tuple(cls.__to_lowest_priority(value) for value in original_priority)
62
+ else:
63
+ return -sys.maxsize - 1
64
+
65
+ @classmethod
66
+ def __to_lower_priority(cls, original_priority: PriorityType) -> PriorityType:
67
+ if isinstance(original_priority, tuple):
68
+ return tuple(cls.__to_lower_priority(value) for value in original_priority)
69
+ else:
70
+ return original_priority - 1
@@ -0,0 +1,45 @@
1
+ from asyncio import Queue
2
+ from typing import Any, Dict
3
+
4
+ from scaler.utility.queues.async_priority_queue import AsyncPriorityQueue
5
+
6
+
7
+ class AsyncSortedPriorityQueue(Queue):
8
+ """A subclass of Queue; retrieves entries in priority order (lowest first), and then by adding order.
9
+
10
+ Entries are typically list of the form: [priority number, data].
11
+ """
12
+
13
+ def __len__(self):
14
+ return len(self._queue)
15
+
16
+ def _init(self, maxsize: int):
17
+ self._queue = AsyncPriorityQueue()
18
+
19
+ # Keeps an item count to assign monotonic integer to queued items, so to also keep the priority queue sorted by
20
+ # adding order.
21
+ # See https://docs.python.org/3/library/heapq.html#priority-queue-implementation-notes.
22
+ self._item_counter: int = 0
23
+ self._data_to_item_id: Dict[Any, int] = dict()
24
+
25
+ def _put(self, item) -> None:
26
+ priority, data = item
27
+
28
+ if data in self._data_to_item_id:
29
+ raise ValueError(f"item `{data}` already in the queue")
30
+
31
+ item_id = self._item_counter
32
+ self._item_counter += 1
33
+
34
+ self._queue._put([priority, (item_id, data)])
35
+ self._data_to_item_id[data] = item_id
36
+
37
+ def _get(self):
38
+ priority, (_, data) = self._queue._get()
39
+ self._data_to_item_id.pop(data)
40
+
41
+ return [priority, data]
42
+
43
+ def remove(self, data: Any) -> None:
44
+ item_id = self._data_to_item_id.pop(data)
45
+ self._queue.remove((item_id, data))
@@ -0,0 +1,114 @@
1
+ import dataclasses
2
+ from typing import Any, Dict, Hashable, Optional
3
+
4
+
5
+ @dataclasses.dataclass
6
+ class _Node:
7
+ value: Any
8
+ prev: Optional["_Node"] = None
9
+ next: Optional["_Node"] = None
10
+
11
+
12
+ class _DoubleLinkedList:
13
+ def __init__(self):
14
+ self._head: Optional[_Node] = None
15
+ self._tail: Optional[_Node] = None
16
+ self._size = 0
17
+
18
+ def __len__(self):
19
+ return self._size
20
+
21
+ def add_to_head(self, node: _Node):
22
+ if self._head is None:
23
+ self._head = node
24
+ self._tail = node
25
+ else:
26
+ node.next = self._head
27
+ self._head.prev = node
28
+ self._head = node
29
+
30
+ self._size += 1
31
+
32
+ def remove_tail(self):
33
+ if self._tail is None:
34
+ raise IndexError(f"{self.__class__.__name__} queue empty")
35
+
36
+ node = self._tail
37
+ if self._tail.prev is None:
38
+ self._head = None
39
+ self._tail = None
40
+ else:
41
+ self._tail = self._tail.prev
42
+ self._tail.next = None
43
+
44
+ self._size -= 1
45
+ return node
46
+
47
+ def remove(self, node: _Node):
48
+ prev_node = node.prev
49
+ next_node = node.next
50
+ if prev_node and next_node:
51
+ prev_node.next = next_node
52
+ next_node.prev = prev_node
53
+
54
+ elif not prev_node and not next_node:
55
+ assert self._head is node
56
+ assert self._tail is node
57
+ self._head = None
58
+ self._tail = None
59
+
60
+ elif prev_node and not next_node:
61
+ assert self._tail is node
62
+ prev_node.next = None
63
+ self._tail = prev_node
64
+
65
+ elif not prev_node and next_node:
66
+ assert self._head is node
67
+ next_node.prev = None
68
+ self._head = next_node
69
+
70
+ self._size -= 1
71
+ del node
72
+
73
+
74
+ class IndexedQueue:
75
+ """A queue that provides O(1) operations for adding and removing any item."""
76
+
77
+ def __init__(self):
78
+ self._double_linked_list = _DoubleLinkedList()
79
+ self._hash_map: Dict[int, _Node] = {}
80
+
81
+ def __contains__(self, item: Hashable):
82
+ key = hash(item)
83
+ return key in self._hash_map
84
+
85
+ def __len__(self):
86
+ return self._double_linked_list.__len__()
87
+
88
+ def __iter__(self):
89
+ node = self._double_linked_list._tail
90
+ while node is not None:
91
+ yield node.value
92
+ node = node.prev
93
+
94
+ def put(self, item: Hashable):
95
+ key = hash(item)
96
+ if key in self._hash_map:
97
+ raise KeyError(f"{self.__class__.__name__} already have item: {item}")
98
+
99
+ node = _Node(item)
100
+ self._double_linked_list.add_to_head(node)
101
+ self._hash_map[key] = node
102
+
103
+ def get(self):
104
+ node = self._double_linked_list.remove_tail()
105
+ del self._hash_map[hash(node.value)]
106
+ return node.value
107
+
108
+ def remove(self, item: Hashable):
109
+ key = hash(item)
110
+ if key not in self._hash_map:
111
+ raise ValueError(f"{self.__class__.__name__} doesn't have item: {item}")
112
+
113
+ node = self._hash_map.pop(key)
114
+ self._double_linked_list.remove(node)
@@ -0,0 +1,9 @@
1
+ import pickle
2
+
3
+
4
+ def serialize_failure(exp: Exception) -> bytes:
5
+ return pickle.dumps(exp, protocol=pickle.HIGHEST_PROTOCOL)
6
+
7
+
8
+ def deserialize_failure(result: bytes) -> Exception:
9
+ return pickle.loads(result)
scaler/version.txt ADDED
@@ -0,0 +1 @@
1
+ 1.12.28
File without changes
File without changes
@@ -0,0 +1,107 @@
1
+ import time
2
+ from typing import Dict, Optional
3
+
4
+ import psutil
5
+
6
+ from scaler.config.types.object_storage_server import ObjectStorageConfig
7
+ from scaler.io.mixins import AsyncConnector, AsyncObjectStorageConnector
8
+ from scaler.protocol.python.message import Resource, WorkerHeartbeat, WorkerHeartbeatEcho
9
+ from scaler.protocol.python.status import ProcessorStatus
10
+ from scaler.utility.mixins import Looper
11
+ from scaler.worker.agent.mixins import HeartbeatManager, ProcessorManager, TaskManager, TimeoutManager
12
+ from scaler.worker.agent.processor_holder import ProcessorHolder
13
+
14
+
15
+ class VanillaHeartbeatManager(Looper, HeartbeatManager):
16
+ def __init__(
17
+ self, object_storage_address: Optional[ObjectStorageConfig], capabilities: Dict[str, int], task_queue_size: int
18
+ ):
19
+ self._agent_process = psutil.Process()
20
+ self._capabilities = capabilities
21
+ self._task_queue_size = task_queue_size
22
+
23
+ self._connector_external: Optional[AsyncConnector] = None
24
+ self._connector_storage: Optional[AsyncObjectStorageConnector] = None
25
+ self._worker_task_manager: Optional[TaskManager] = None
26
+ self._timeout_manager: Optional[TimeoutManager] = None
27
+ self._processor_manager: Optional[ProcessorManager] = None
28
+
29
+ self._start_timestamp_ns = 0
30
+ self._latency_us = 0
31
+
32
+ self._object_storage_address: Optional[ObjectStorageConfig] = object_storage_address
33
+
34
+ def register(
35
+ self,
36
+ connector_external: AsyncConnector,
37
+ connector_storage: AsyncObjectStorageConnector,
38
+ worker_task_manager: TaskManager,
39
+ timeout_manager: TimeoutManager,
40
+ processor_manager: ProcessorManager,
41
+ ):
42
+ self._connector_external = connector_external
43
+ self._connector_storage = connector_storage
44
+ self._worker_task_manager = worker_task_manager
45
+ self._timeout_manager = timeout_manager
46
+ self._processor_manager = processor_manager
47
+
48
+ async def on_heartbeat_echo(self, heartbeat: WorkerHeartbeatEcho):
49
+ if self._start_timestamp_ns == 0:
50
+ # not handling echo if we didn't send out heartbeat
51
+ return
52
+
53
+ self._latency_us = int(((time.time_ns() - self._start_timestamp_ns) / 2) // 1_000)
54
+ self._start_timestamp_ns = 0
55
+ self._timeout_manager.update_last_seen_time()
56
+
57
+ if self._object_storage_address is None:
58
+ address_message = heartbeat.object_storage_address()
59
+ self._object_storage_address = ObjectStorageConfig(address_message.host, address_message.port)
60
+ await self._connector_storage.connect(self._object_storage_address.host, self._object_storage_address.port)
61
+
62
+ async def routine(self):
63
+ processors = self._processor_manager.processors()
64
+
65
+ if self._start_timestamp_ns != 0:
66
+ # already sent heartbeat, expecting heartbeat echo, so not sending
67
+ return
68
+
69
+ for processor_holder in processors:
70
+ status = processor_holder.process().status()
71
+ if status in {psutil.STATUS_ZOMBIE, psutil.STATUS_DEAD}:
72
+ await self._processor_manager.on_failing_processor(processor_holder.processor_id(), status)
73
+
74
+ processors = self._processor_manager.processors() # refreshes for removed dead and zombie processors
75
+ num_suspended_processors = self._processor_manager.num_suspended_processors()
76
+
77
+ # TODO: add task queue size to WorkerHeartbeat
78
+ await self._connector_external.send(
79
+ WorkerHeartbeat.new_msg(
80
+ Resource.new_msg(int(self._agent_process.cpu_percent() * 10), self._agent_process.memory_info().rss),
81
+ psutil.virtual_memory().available,
82
+ self._task_queue_size,
83
+ self._worker_task_manager.get_queued_size() - num_suspended_processors,
84
+ self._latency_us,
85
+ self._processor_manager.can_accept_task(),
86
+ [self.__get_processor_status_from_holder(processor) for processor in processors],
87
+ self._capabilities,
88
+ )
89
+ )
90
+ self._start_timestamp_ns = time.time_ns()
91
+
92
+ def get_object_storage_address(self) -> Optional[ObjectStorageConfig]:
93
+ return self._object_storage_address
94
+
95
+ @staticmethod
96
+ def __get_processor_status_from_holder(processor: ProcessorHolder) -> ProcessorStatus:
97
+ process = processor.process()
98
+
99
+ try:
100
+ resource = Resource.new_msg(int(process.cpu_percent() * 10), process.memory_info().rss)
101
+ except psutil.ZombieProcess:
102
+ # Assumes dead processes do not use any resources
103
+ resource = Resource.new_msg(0, 0)
104
+
105
+ return ProcessorStatus.new_msg(
106
+ processor.pid(), processor.initialized(), processor.task() is not None, processor.suspended(), resource
107
+ )
@@ -0,0 +1,137 @@
1
+ import abc
2
+ from typing import List, Optional
3
+
4
+ from scaler.config.types.object_storage_server import ObjectStorageConfig
5
+ from scaler.protocol.python.message import (
6
+ ObjectInstruction,
7
+ ProcessorInitialized,
8
+ Task,
9
+ TaskCancel,
10
+ TaskResult,
11
+ WorkerHeartbeatEcho,
12
+ )
13
+ from scaler.utility.identifiers import ProcessorID, TaskID
14
+ from scaler.utility.metadata.profile_result import ProfileResult
15
+ from scaler.worker.agent.processor_holder import ProcessorHolder
16
+
17
+
18
+ class HeartbeatManager(metaclass=abc.ABCMeta):
19
+ @abc.abstractmethod
20
+ async def on_heartbeat_echo(self, heartbeat: WorkerHeartbeatEcho):
21
+ raise NotImplementedError()
22
+
23
+ @abc.abstractmethod
24
+ def get_object_storage_address(self) -> ObjectStorageConfig:
25
+ raise NotImplementedError()
26
+
27
+
28
+ class TimeoutManager(metaclass=abc.ABCMeta):
29
+ @abc.abstractmethod
30
+ def update_last_seen_time(self):
31
+ raise NotImplementedError()
32
+
33
+
34
+ class TaskManager(metaclass=abc.ABCMeta):
35
+ @abc.abstractmethod
36
+ async def on_task_new(self, task: Task):
37
+ raise NotImplementedError()
38
+
39
+ @abc.abstractmethod
40
+ async def on_task_result(self, result: TaskResult):
41
+ raise NotImplementedError()
42
+
43
+ @abc.abstractmethod
44
+ def on_cancel_task(self, task_cancel: TaskCancel):
45
+ raise NotImplementedError()
46
+
47
+ @abc.abstractmethod
48
+ def get_queued_size(self):
49
+ raise NotImplementedError()
50
+
51
+
52
+ class ProcessorManager(metaclass=abc.ABCMeta):
53
+ @abc.abstractmethod
54
+ def can_accept_task(self) -> bool:
55
+ raise NotImplementedError()
56
+
57
+ @abc.abstractmethod
58
+ async def wait_until_can_accept_task(self):
59
+ raise NotImplementedError()
60
+
61
+ @abc.abstractmethod
62
+ async def on_processor_initialized(self, processor_id: ProcessorID, processor_initialized: ProcessorInitialized):
63
+ raise NotImplementedError()
64
+
65
+ @abc.abstractmethod
66
+ async def on_task(self, task: Task) -> bool:
67
+ raise NotImplementedError()
68
+
69
+ @abc.abstractmethod
70
+ async def on_cancel_task(self, task_id: TaskID) -> Optional[Task]:
71
+ raise NotImplementedError()
72
+
73
+ @abc.abstractmethod
74
+ async def on_failing_processor(self, processor_id: ProcessorID, process_status: str):
75
+ raise NotImplementedError()
76
+
77
+ @abc.abstractmethod
78
+ async def on_suspend_task(self, task_id: TaskID) -> bool:
79
+ raise NotImplementedError()
80
+
81
+ @abc.abstractmethod
82
+ def on_resume_task(self, task_id: TaskID) -> bool:
83
+ raise NotImplementedError()
84
+
85
+ @abc.abstractmethod
86
+ async def on_task_result(self, processor_id: ProcessorID, task_result: TaskResult):
87
+ raise NotImplementedError()
88
+
89
+ @abc.abstractmethod
90
+ async def on_external_object_instruction(self, instruction: ObjectInstruction):
91
+ raise NotImplementedError()
92
+
93
+ @abc.abstractmethod
94
+ async def on_internal_object_instruction(self, processor_id: ProcessorID, instruction: ObjectInstruction):
95
+ raise NotImplementedError()
96
+
97
+ @abc.abstractmethod
98
+ def destroy(self, reason: str):
99
+ raise NotImplementedError()
100
+
101
+ @abc.abstractmethod
102
+ def current_processor_is_initialized(self) -> bool:
103
+ raise NotImplementedError()
104
+
105
+ @abc.abstractmethod
106
+ def current_task(self) -> Optional[Task]:
107
+ raise NotImplementedError()
108
+
109
+ @abc.abstractmethod
110
+ def current_task_id(self) -> TaskID:
111
+ raise NotImplementedError()
112
+
113
+ @abc.abstractmethod
114
+ def processors(self) -> List[ProcessorHolder]:
115
+ raise NotImplementedError()
116
+
117
+ @abc.abstractmethod
118
+ def num_suspended_processors(self) -> int:
119
+ raise NotImplementedError()
120
+
121
+
122
+ class ProfilingManager(metaclass=abc.ABCMeta):
123
+ @abc.abstractmethod
124
+ def on_process_start(self, pid: int):
125
+ raise NotImplementedError()
126
+
127
+ @abc.abstractmethod
128
+ def on_process_end(self, pid: int):
129
+ raise NotImplementedError()
130
+
131
+ @abc.abstractmethod
132
+ def on_task_start(self, pid: int, task_id: TaskID):
133
+ raise NotImplementedError()
134
+
135
+ @abc.abstractmethod
136
+ def on_task_end(self, pid: int, task_id: TaskID) -> ProfileResult:
137
+ raise NotImplementedError()
File without changes