opengris-scaler 1.12.28__cp313-cp313-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of opengris-scaler might be problematic. Click here for more details.

Files changed (187) hide show
  1. opengris_scaler-1.12.28.dist-info/METADATA +728 -0
  2. opengris_scaler-1.12.28.dist-info/RECORD +187 -0
  3. opengris_scaler-1.12.28.dist-info/WHEEL +5 -0
  4. opengris_scaler-1.12.28.dist-info/entry_points.txt +10 -0
  5. opengris_scaler-1.12.28.dist-info/licenses/LICENSE +201 -0
  6. opengris_scaler-1.12.28.dist-info/licenses/LICENSE.spdx +7 -0
  7. opengris_scaler-1.12.28.dist-info/licenses/NOTICE +8 -0
  8. opengris_scaler.libs/libcapnp-1-e88d5415.0.1.so +0 -0
  9. opengris_scaler.libs/libgcc_s-2298274a.so.1 +0 -0
  10. opengris_scaler.libs/libkj-1-9bebd8ac.0.1.so +0 -0
  11. opengris_scaler.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
  12. scaler/__init__.py +14 -0
  13. scaler/about.py +5 -0
  14. scaler/client/__init__.py +0 -0
  15. scaler/client/agent/__init__.py +0 -0
  16. scaler/client/agent/client_agent.py +210 -0
  17. scaler/client/agent/disconnect_manager.py +27 -0
  18. scaler/client/agent/future_manager.py +112 -0
  19. scaler/client/agent/heartbeat_manager.py +74 -0
  20. scaler/client/agent/mixins.py +89 -0
  21. scaler/client/agent/object_manager.py +98 -0
  22. scaler/client/agent/task_manager.py +64 -0
  23. scaler/client/client.py +658 -0
  24. scaler/client/future.py +252 -0
  25. scaler/client/object_buffer.py +129 -0
  26. scaler/client/object_reference.py +25 -0
  27. scaler/client/serializer/__init__.py +0 -0
  28. scaler/client/serializer/default.py +16 -0
  29. scaler/client/serializer/mixins.py +38 -0
  30. scaler/cluster/__init__.py +0 -0
  31. scaler/cluster/cluster.py +115 -0
  32. scaler/cluster/combo.py +150 -0
  33. scaler/cluster/object_storage_server.py +45 -0
  34. scaler/cluster/scheduler.py +86 -0
  35. scaler/config/__init__.py +0 -0
  36. scaler/config/defaults.py +94 -0
  37. scaler/config/loader.py +96 -0
  38. scaler/config/mixins.py +20 -0
  39. scaler/config/section/__init__.py +0 -0
  40. scaler/config/section/cluster.py +55 -0
  41. scaler/config/section/ecs_worker_adapter.py +85 -0
  42. scaler/config/section/native_worker_adapter.py +43 -0
  43. scaler/config/section/object_storage_server.py +8 -0
  44. scaler/config/section/scheduler.py +54 -0
  45. scaler/config/section/symphony_worker_adapter.py +47 -0
  46. scaler/config/section/top.py +13 -0
  47. scaler/config/section/webui.py +21 -0
  48. scaler/config/types/__init__.py +0 -0
  49. scaler/config/types/network_backend.py +12 -0
  50. scaler/config/types/object_storage_server.py +45 -0
  51. scaler/config/types/worker.py +62 -0
  52. scaler/config/types/zmq.py +83 -0
  53. scaler/entry_points/__init__.py +0 -0
  54. scaler/entry_points/cluster.py +133 -0
  55. scaler/entry_points/object_storage_server.py +45 -0
  56. scaler/entry_points/scheduler.py +144 -0
  57. scaler/entry_points/top.py +286 -0
  58. scaler/entry_points/webui.py +48 -0
  59. scaler/entry_points/worker_adapter_ecs.py +191 -0
  60. scaler/entry_points/worker_adapter_native.py +137 -0
  61. scaler/entry_points/worker_adapter_symphony.py +98 -0
  62. scaler/io/__init__.py +0 -0
  63. scaler/io/async_binder.py +89 -0
  64. scaler/io/async_connector.py +95 -0
  65. scaler/io/async_object_storage_connector.py +225 -0
  66. scaler/io/mixins.py +154 -0
  67. scaler/io/sync_connector.py +68 -0
  68. scaler/io/sync_object_storage_connector.py +247 -0
  69. scaler/io/sync_subscriber.py +83 -0
  70. scaler/io/utility.py +80 -0
  71. scaler/io/ymq/__init__.py +0 -0
  72. scaler/io/ymq/_ymq.pyi +95 -0
  73. scaler/io/ymq/ymq.py +138 -0
  74. scaler/io/ymq_async_object_storage_connector.py +184 -0
  75. scaler/io/ymq_sync_object_storage_connector.py +184 -0
  76. scaler/object_storage/__init__.py +0 -0
  77. scaler/protocol/__init__.py +0 -0
  78. scaler/protocol/capnp/__init__.py +0 -0
  79. scaler/protocol/capnp/_python.py +6 -0
  80. scaler/protocol/capnp/common.capnp +68 -0
  81. scaler/protocol/capnp/message.capnp +218 -0
  82. scaler/protocol/capnp/object_storage.capnp +57 -0
  83. scaler/protocol/capnp/status.capnp +73 -0
  84. scaler/protocol/introduction.md +105 -0
  85. scaler/protocol/python/__init__.py +0 -0
  86. scaler/protocol/python/common.py +140 -0
  87. scaler/protocol/python/message.py +751 -0
  88. scaler/protocol/python/mixins.py +13 -0
  89. scaler/protocol/python/object_storage.py +118 -0
  90. scaler/protocol/python/status.py +279 -0
  91. scaler/protocol/worker.md +228 -0
  92. scaler/scheduler/__init__.py +0 -0
  93. scaler/scheduler/allocate_policy/__init__.py +0 -0
  94. scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
  95. scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
  96. scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
  97. scaler/scheduler/allocate_policy/mixins.py +55 -0
  98. scaler/scheduler/controllers/__init__.py +0 -0
  99. scaler/scheduler/controllers/balance_controller.py +65 -0
  100. scaler/scheduler/controllers/client_controller.py +131 -0
  101. scaler/scheduler/controllers/config_controller.py +31 -0
  102. scaler/scheduler/controllers/graph_controller.py +424 -0
  103. scaler/scheduler/controllers/information_controller.py +81 -0
  104. scaler/scheduler/controllers/mixins.py +194 -0
  105. scaler/scheduler/controllers/object_controller.py +147 -0
  106. scaler/scheduler/controllers/scaling_policies/__init__.py +0 -0
  107. scaler/scheduler/controllers/scaling_policies/fixed_elastic.py +145 -0
  108. scaler/scheduler/controllers/scaling_policies/mixins.py +10 -0
  109. scaler/scheduler/controllers/scaling_policies/null.py +14 -0
  110. scaler/scheduler/controllers/scaling_policies/types.py +9 -0
  111. scaler/scheduler/controllers/scaling_policies/utility.py +20 -0
  112. scaler/scheduler/controllers/scaling_policies/vanilla.py +95 -0
  113. scaler/scheduler/controllers/task_controller.py +376 -0
  114. scaler/scheduler/controllers/worker_controller.py +169 -0
  115. scaler/scheduler/object_usage/__init__.py +0 -0
  116. scaler/scheduler/object_usage/object_tracker.py +131 -0
  117. scaler/scheduler/scheduler.py +251 -0
  118. scaler/scheduler/task/__init__.py +0 -0
  119. scaler/scheduler/task/task_state_machine.py +92 -0
  120. scaler/scheduler/task/task_state_manager.py +61 -0
  121. scaler/ui/__init__.py +0 -0
  122. scaler/ui/constants.py +9 -0
  123. scaler/ui/live_display.py +147 -0
  124. scaler/ui/memory_window.py +146 -0
  125. scaler/ui/setting_page.py +40 -0
  126. scaler/ui/task_graph.py +832 -0
  127. scaler/ui/task_log.py +107 -0
  128. scaler/ui/utility.py +66 -0
  129. scaler/ui/webui.py +147 -0
  130. scaler/ui/worker_processors.py +104 -0
  131. scaler/utility/__init__.py +0 -0
  132. scaler/utility/debug.py +19 -0
  133. scaler/utility/event_list.py +63 -0
  134. scaler/utility/event_loop.py +58 -0
  135. scaler/utility/exceptions.py +42 -0
  136. scaler/utility/formatter.py +44 -0
  137. scaler/utility/graph/__init__.py +0 -0
  138. scaler/utility/graph/optimization.py +27 -0
  139. scaler/utility/graph/topological_sorter.py +11 -0
  140. scaler/utility/graph/topological_sorter_graphblas.py +174 -0
  141. scaler/utility/identifiers.py +107 -0
  142. scaler/utility/logging/__init__.py +0 -0
  143. scaler/utility/logging/decorators.py +25 -0
  144. scaler/utility/logging/scoped_logger.py +33 -0
  145. scaler/utility/logging/utility.py +183 -0
  146. scaler/utility/many_to_many_dict.py +123 -0
  147. scaler/utility/metadata/__init__.py +0 -0
  148. scaler/utility/metadata/profile_result.py +31 -0
  149. scaler/utility/metadata/task_flags.py +30 -0
  150. scaler/utility/mixins.py +13 -0
  151. scaler/utility/network_util.py +7 -0
  152. scaler/utility/one_to_many_dict.py +72 -0
  153. scaler/utility/queues/__init__.py +0 -0
  154. scaler/utility/queues/async_indexed_queue.py +37 -0
  155. scaler/utility/queues/async_priority_queue.py +70 -0
  156. scaler/utility/queues/async_sorted_priority_queue.py +45 -0
  157. scaler/utility/queues/indexed_queue.py +114 -0
  158. scaler/utility/serialization.py +9 -0
  159. scaler/version.txt +1 -0
  160. scaler/worker/__init__.py +0 -0
  161. scaler/worker/agent/__init__.py +0 -0
  162. scaler/worker/agent/heartbeat_manager.py +107 -0
  163. scaler/worker/agent/mixins.py +137 -0
  164. scaler/worker/agent/processor/__init__.py +0 -0
  165. scaler/worker/agent/processor/object_cache.py +107 -0
  166. scaler/worker/agent/processor/processor.py +285 -0
  167. scaler/worker/agent/processor/streaming_buffer.py +28 -0
  168. scaler/worker/agent/processor_holder.py +147 -0
  169. scaler/worker/agent/processor_manager.py +369 -0
  170. scaler/worker/agent/profiling_manager.py +109 -0
  171. scaler/worker/agent/task_manager.py +150 -0
  172. scaler/worker/agent/timeout_manager.py +19 -0
  173. scaler/worker/preload.py +84 -0
  174. scaler/worker/worker.py +265 -0
  175. scaler/worker_adapter/__init__.py +0 -0
  176. scaler/worker_adapter/common.py +26 -0
  177. scaler/worker_adapter/ecs.py +269 -0
  178. scaler/worker_adapter/native.py +155 -0
  179. scaler/worker_adapter/symphony/__init__.py +0 -0
  180. scaler/worker_adapter/symphony/callback.py +45 -0
  181. scaler/worker_adapter/symphony/heartbeat_manager.py +79 -0
  182. scaler/worker_adapter/symphony/message.py +24 -0
  183. scaler/worker_adapter/symphony/task_manager.py +289 -0
  184. scaler/worker_adapter/symphony/worker.py +204 -0
  185. scaler/worker_adapter/symphony/worker_adapter.py +139 -0
  186. src/scaler/io/ymq/_ymq.so +0 -0
  187. src/scaler/object_storage/object_storage_server.so +0 -0
@@ -0,0 +1,13 @@
1
+ import abc
2
+ from typing import TypeVar
3
+
4
+
5
+ class Message(metaclass=abc.ABCMeta):
6
+ def __init__(self, msg):
7
+ self._msg = msg
8
+
9
+ def get_message(self):
10
+ return self._msg
11
+
12
+
13
+ MessageType = TypeVar("MessageType", bound=Message)
@@ -0,0 +1,118 @@
1
+ import dataclasses
2
+ import enum
3
+ import struct
4
+
5
+ from scaler.protocol.capnp._python import _object_storage # noqa
6
+ from scaler.protocol.python.mixins import Message
7
+ from scaler.utility.identifiers import ObjectID
8
+
9
+ OBJECT_ID_FORMAT = "!QQQQ"
10
+
11
+
12
+ @dataclasses.dataclass
13
+ class ObjectRequestHeader(Message):
14
+ class ObjectRequestType(enum.Enum):
15
+ SetObject = _object_storage.ObjectRequestHeader.ObjectRequestType.setObject
16
+ GetObject = _object_storage.ObjectRequestHeader.ObjectRequestType.getObject
17
+ DeleteObject = _object_storage.ObjectRequestHeader.ObjectRequestType.deleteObject
18
+ DuplicateObjectID = _object_storage.ObjectRequestHeader.ObjectRequestType.duplicateObjectID
19
+
20
+ def __init__(self, msg):
21
+ super().__init__(msg)
22
+
23
+ @property
24
+ def object_id(self) -> ObjectID:
25
+ return from_capnp_object_id(self._msg.objectID)
26
+
27
+ @property
28
+ def payload_length(self) -> int:
29
+ return self._msg.payloadLength
30
+
31
+ @property
32
+ def request_id(self) -> int:
33
+ return self._msg.requestID
34
+
35
+ @property
36
+ def request_type(self) -> ObjectRequestType:
37
+ return ObjectRequestHeader.ObjectRequestType(self._msg.requestType.raw)
38
+
39
+ @staticmethod
40
+ def new_msg(
41
+ object_id: ObjectID, payload_length: int, request_id: int, request_type: ObjectRequestType
42
+ ) -> "ObjectRequestHeader":
43
+ return ObjectRequestHeader(
44
+ _object_storage.ObjectRequestHeader(
45
+ objectID=to_capnp_object_id(object_id),
46
+ payloadLength=payload_length,
47
+ requestID=request_id,
48
+ requestType=request_type.value,
49
+ )
50
+ )
51
+
52
+ def get_message(self):
53
+ return self._msg
54
+
55
+
56
+ @dataclasses.dataclass
57
+ class ObjectResponseHeader(Message):
58
+ MESSAGE_LENGTH = 80 # there does not seem to be a way to statically know the size of a pycapnp message
59
+
60
+ class ObjectResponseType(enum.Enum):
61
+ SetOK = _object_storage.ObjectResponseHeader.ObjectResponseType.setOK
62
+ GetOK = _object_storage.ObjectResponseHeader.ObjectResponseType.getOK
63
+ DelOK = _object_storage.ObjectResponseHeader.ObjectResponseType.delOK
64
+ DelNotExists = _object_storage.ObjectResponseHeader.ObjectResponseType.delNotExists
65
+ DuplicateOK = _object_storage.ObjectResponseHeader.ObjectResponseType.duplicateOK
66
+
67
+ def __init__(self, msg):
68
+ super().__init__(msg)
69
+
70
+ @property
71
+ def object_id(self) -> ObjectID:
72
+ return from_capnp_object_id(self._msg.objectID)
73
+
74
+ @property
75
+ def payload_length(self) -> int:
76
+ return self._msg.payloadLength
77
+
78
+ @property
79
+ def response_id(self) -> int:
80
+ return self._msg.responseID
81
+
82
+ @property
83
+ def response_type(self) -> ObjectResponseType:
84
+ return ObjectResponseHeader.ObjectResponseType(self._msg.responseType.raw)
85
+
86
+ @staticmethod
87
+ def new_msg(
88
+ object_id: ObjectID, payload_length: int, response_id: int, response_type: ObjectResponseType
89
+ ) -> "ObjectResponseHeader":
90
+ return ObjectResponseHeader(
91
+ _object_storage.ObjectResponseHeader(
92
+ objectID=to_capnp_object_id(object_id),
93
+ payloadLength=payload_length,
94
+ responseID=response_id,
95
+ responseType=response_type.value,
96
+ )
97
+ )
98
+
99
+ def get_message(self):
100
+ return self._msg
101
+
102
+
103
+ def to_capnp_object_id(object_id: ObjectID) -> _object_storage.ObjectID:
104
+ field0, field1, field2, field3 = struct.unpack(OBJECT_ID_FORMAT, object_id)
105
+
106
+ return _object_storage.ObjectID(field0=field0, field1=field1, field2=field2, field3=field3)
107
+
108
+
109
+ def from_capnp_object_id(capnp_object_id: _object_storage.ObjectID) -> ObjectID:
110
+ return ObjectID(
111
+ struct.pack(
112
+ OBJECT_ID_FORMAT,
113
+ capnp_object_id.field0,
114
+ capnp_object_id.field1,
115
+ capnp_object_id.field2,
116
+ capnp_object_id.field3,
117
+ )
118
+ )
@@ -0,0 +1,279 @@
1
+ from typing import Dict, List
2
+
3
+ from scaler.protocol.capnp._python import _status # noqa
4
+ from scaler.protocol.python.common import TaskState
5
+ from scaler.protocol.python.mixins import Message
6
+ from scaler.utility.identifiers import ClientID, WorkerID
7
+
8
+ CPU_MAXIMUM = 1000
9
+ WorkerGroupID = bytes
10
+
11
+
12
+ class Resource(Message):
13
+ def __init__(self, msg):
14
+ super().__init__(msg)
15
+
16
+ @property
17
+ def cpu(self) -> int:
18
+ return self._msg.cpu
19
+
20
+ @property
21
+ def rss(self) -> int:
22
+ return self._msg.rss
23
+
24
+ @staticmethod
25
+ def new_msg(cpu: int, rss: int) -> "Resource": # type: ignore[override]
26
+ return Resource(_status.Resource(cpu=min(cpu, CPU_MAXIMUM), rss=rss))
27
+
28
+ def get_message(self):
29
+ return self._msg
30
+
31
+
32
+ class ObjectManagerStatus(Message):
33
+ def __init__(self, msg):
34
+ super().__init__(msg)
35
+
36
+ @property
37
+ def number_of_objects(self) -> int:
38
+ return self._msg.numberOfObjects
39
+
40
+ @staticmethod
41
+ def new_msg(number_of_objects: int) -> "ObjectManagerStatus": # type: ignore[override]
42
+ return ObjectManagerStatus(_status.ObjectManagerStatus(numberOfObjects=number_of_objects))
43
+
44
+ def get_message(self):
45
+ return self._msg
46
+
47
+
48
+ class ClientManagerStatus(Message):
49
+ def __init__(self, msg):
50
+ super().__init__(msg)
51
+
52
+ @property
53
+ def client_to_num_of_tasks(self) -> Dict[ClientID, int]:
54
+ return {p.client: p.numTask for p in self._msg.clientToNumOfTask}
55
+
56
+ @staticmethod
57
+ def new_msg(client_to_num_of_tasks: Dict[ClientID, int]) -> "ClientManagerStatus": # type: ignore[override]
58
+ return ClientManagerStatus(
59
+ _status.ClientManagerStatus(
60
+ clientToNumOfTask=[
61
+ _status.ClientManagerStatus.Pair(client=client_id.decode(), numTask=num_tasks)
62
+ for client_id, num_tasks in client_to_num_of_tasks.items()
63
+ ]
64
+ )
65
+ )
66
+
67
+ def get_message(self):
68
+ return self._msg
69
+
70
+
71
+ class TaskManagerStatus(Message):
72
+ VALUE_SIZE_LIMIT = 2**32
73
+
74
+ def __init__(self, msg):
75
+ super().__init__(msg)
76
+
77
+ @property
78
+ def state_to_count(self) -> Dict[TaskState, int]:
79
+ return {TaskState(p.state): p.count for p in self._msg.stateToCount}
80
+
81
+ @staticmethod
82
+ def new_msg(state_to_count: Dict[TaskState, int]) -> "TaskManagerStatus": # type: ignore[override]
83
+ return TaskManagerStatus(
84
+ _status.TaskManagerStatus(
85
+ stateToCount=[
86
+ _status.TaskManagerStatus.Pair(state=p[0].value, count=p[1] % TaskManagerStatus.VALUE_SIZE_LIMIT)
87
+ for p in state_to_count.items()
88
+ ]
89
+ )
90
+ )
91
+
92
+ def get_message(self):
93
+ return self._msg
94
+
95
+
96
+ class ProcessorStatus(Message):
97
+ def __init__(self, msg):
98
+ super().__init__(msg)
99
+
100
+ @property
101
+ def pid(self) -> int:
102
+ return self._msg.pid
103
+
104
+ @property
105
+ def initialized(self) -> int:
106
+ return self._msg.initialized
107
+
108
+ @property
109
+ def has_task(self) -> bool:
110
+ return self._msg.hasTask
111
+
112
+ @property
113
+ def suspended(self) -> bool:
114
+ return self._msg.suspended
115
+
116
+ @property
117
+ def resource(self) -> Resource:
118
+ return Resource(self._msg.resource)
119
+
120
+ @staticmethod
121
+ def new_msg(
122
+ pid: int, initialized: int, has_task: bool, suspended: bool, resource: Resource # type: ignore[override]
123
+ ) -> "ProcessorStatus":
124
+ return ProcessorStatus(
125
+ _status.ProcessorStatus(
126
+ pid=pid, initialized=initialized, hasTask=has_task, suspended=suspended, resource=resource.get_message()
127
+ )
128
+ )
129
+
130
+ def get_message(self):
131
+ return self._msg
132
+
133
+
134
+ class WorkerStatus(Message):
135
+ def __init__(self, msg):
136
+ super().__init__(msg)
137
+
138
+ @property
139
+ def worker_id(self) -> WorkerID:
140
+ return WorkerID(self._msg.workerId)
141
+
142
+ @property
143
+ def agent(self) -> Resource:
144
+ return Resource(self._msg.agent)
145
+
146
+ @property
147
+ def rss_free(self) -> int:
148
+ return self._msg.rssFree
149
+
150
+ @property
151
+ def free(self) -> int:
152
+ return self._msg.free
153
+
154
+ @property
155
+ def sent(self) -> int:
156
+ return self._msg.sent
157
+
158
+ @property
159
+ def queued(self) -> int:
160
+ return self._msg.queued
161
+
162
+ @property
163
+ def suspended(self) -> bool:
164
+ return self._msg.suspended
165
+
166
+ @property
167
+ def lag_us(self) -> int:
168
+ return self._msg.lagUS
169
+
170
+ @property
171
+ def last_s(self) -> int:
172
+ return self._msg.lastS
173
+
174
+ @property
175
+ def itl(self) -> str:
176
+ return self._msg.itl
177
+
178
+ @property
179
+ def processor_statuses(self) -> List[ProcessorStatus]:
180
+ return [ProcessorStatus(ps) for ps in self._msg.processorStatuses]
181
+
182
+ @staticmethod
183
+ def new_msg( # type: ignore[override]
184
+ worker_id: WorkerID,
185
+ agent: Resource,
186
+ rss_free: int,
187
+ free: int,
188
+ sent: int,
189
+ queued: int,
190
+ suspended: int,
191
+ lag_us: int,
192
+ last_s: int,
193
+ itl: str,
194
+ processor_statuses: List[ProcessorStatus],
195
+ ) -> "WorkerStatus":
196
+ return WorkerStatus(
197
+ _status.WorkerStatus(
198
+ workerId=bytes(worker_id),
199
+ agent=agent.get_message(),
200
+ rssFree=rss_free,
201
+ free=free,
202
+ sent=sent,
203
+ queued=queued,
204
+ suspended=suspended,
205
+ lagUS=lag_us,
206
+ lastS=last_s,
207
+ itl=itl,
208
+ processorStatuses=[ps.get_message() for ps in processor_statuses],
209
+ )
210
+ )
211
+
212
+ def get_message(self):
213
+ return self._msg
214
+
215
+
216
+ class WorkerManagerStatus(Message):
217
+ def __init__(self, msg):
218
+ super().__init__(msg)
219
+
220
+ @property
221
+ def workers(self) -> List[WorkerStatus]:
222
+ return [WorkerStatus(ws) for ws in self._msg.workers]
223
+
224
+ @staticmethod
225
+ def new_msg(workers: List[WorkerStatus]) -> "WorkerManagerStatus": # type: ignore[override]
226
+ return WorkerManagerStatus(_status.WorkerManagerStatus(workers=[ws.get_message() for ws in workers]))
227
+
228
+ def get_message(self):
229
+ return self._msg
230
+
231
+
232
+ class ScalingManagerStatus(Message):
233
+ def __init__(self, msg):
234
+ super().__init__(msg)
235
+
236
+ @property
237
+ def worker_groups(self) -> Dict[WorkerGroupID, List[WorkerID]]:
238
+ return {wg.workerGroupID: [WorkerID(wid) for wid in wg.workerIDs] for wg in self._msg.workerGroups}
239
+
240
+ @staticmethod
241
+ def new_msg(worker_groups: Dict[WorkerGroupID, List[WorkerID]]) -> "ScalingManagerStatus": # type: ignore[override]
242
+ return ScalingManagerStatus(
243
+ _status.ScalingManagerStatus(
244
+ workerGroups=[
245
+ _status.ScalingManagerStatus.Pair(
246
+ workerGroupID=worker_group_id, workerIDs=[bytes(worker_id) for worker_id in worker_ids]
247
+ )
248
+ for worker_group_id, worker_ids in worker_groups.items()
249
+ ]
250
+ )
251
+ )
252
+
253
+ def get_message(self):
254
+ return self._msg
255
+
256
+
257
+ class BinderStatus(Message):
258
+ def __init__(self, msg):
259
+ super().__init__(msg)
260
+
261
+ @property
262
+ def received(self) -> Dict[str, int]:
263
+ return {p.client: p.number for p in self._msg.received}
264
+
265
+ @property
266
+ def sent(self) -> Dict[str, int]:
267
+ return {p.client: p.number for p in self._msg.sent}
268
+
269
+ @staticmethod
270
+ def new_msg(received: Dict[str, int], sent: Dict[str, int]) -> "BinderStatus": # type: ignore[override]
271
+ return BinderStatus(
272
+ _status.BinderStatus(
273
+ received=[_status.BinderStatus.Pair(client=p[0], number=p[1]) for p in received.items()],
274
+ sent=[_status.BinderStatus.Pair(client=p[0], number=p[1]) for p in sent.items()],
275
+ )
276
+ )
277
+
278
+ def get_message(self):
279
+ return self._msg
@@ -0,0 +1,228 @@
1
+ # Custom Worker Implementation
2
+
3
+ ## Overview
4
+
5
+ The existed python worker implementation can work with scheduler and scaler protocol is under `scaler.worker`
6
+
7
+ - workers are connecting to 1 single TCP port of scheduler by using zmq
8
+ - each worker has a fixed length task queue to store the incoming tasks
9
+ - in Task message or `TaskResult` message, it will contain object ids instead of actual data, once worker received one
10
+ task, it should ask scheduler for the actual data by sending `ObjectRequest` message
11
+ - once worker finished the task, it should send `TaskResult` message back to scheduler, the `TaskResult` message should
12
+ have the object id of the result, and worker should send `ObjectInstruction` message to scheduler to store the result
13
+ - each worker have a unique id and can be identified by the scheduler that can be set by zmq (see below setup)
14
+ - worker is responsible for sending the heartbeat to scheduler, if scheduler didn't receive heartbeat from worker for
15
+ a period of time, then scheduler will consider the worker is dead and will reallocate all tasks to other workers, also
16
+ heartbeat message will contain the worker's resource usage information and queue capacity and usage status
17
+ - each Task is tied to a source, each source has a dedicated serializer, the source is used to choose the proper
18
+ serializer for deserializing the function and arguments, worker will need retrieve the serializer bytes using an
19
+ `ObjectRequest` message, and then deserialize the serializer bytes using cloudpickle, deserialized serializer is
20
+ implements the interface `scaler.client.serializer.mixins.Serializer`
21
+ - When worker received TaskCancel message, it should cancel the task regardless of tasks status with the given task ID
22
+ and send a `TaskResult` back to the scheduler
23
+
24
+ - worker can request to scheduler to balance the tasks, scheduler will send the tasks to be given up by the worker (this
25
+ will be replaced by TaskCancel message in the future)
26
+
27
+ ## Setup
28
+
29
+ The zmq `IDENTITY` must be unique. The zmq `SNDHWM` and `RCVHWM` should both be set to 0 to prevent messages from being
30
+ dropped unexpectedly, and it should be DEALER socket type.
31
+
32
+ All message will be list of bytes represented by frames, the first frame is the message type, and the rest of the frames
33
+ are the message data. One message is the whole list of frames.
34
+
35
+ To send a single message using pyzmq, use the `send_multipart` method:
36
+
37
+ ```python
38
+ # Synchronous API
39
+ socket.send_multipart(frames)
40
+
41
+ # Asyncronous API
42
+ await socket.send_multipart(frames)
43
+ ```
44
+
45
+ Each message may contain multiple zmq frames. To read a single message using pyzmq, use the `recv_multipart` method:
46
+
47
+ ```python
48
+ # Synchronous API
49
+ frames = socket.recv_multipart()
50
+
51
+ # Asyncronous API
52
+ frames = await socket.recv_multipart()
53
+ ```
54
+
55
+ All below messages, please see `scaler.protocol.python.message.py` for actual message structure to help you understand
56
+
57
+ ## Recv messages
58
+
59
+ ### Task `TK`
60
+
61
+ | message_type | task_id | source | metadata | func_object_id | arg 1 type | arg 1 data | (...) | arg N type | arg N data |
62
+ |:------------:|:-------:|:-------:|:--------:|:--------------:|:----------:|:----------:|:-----:|:----------:|:----------:|
63
+ | b"TK" | X bytes | X bytes | X bytes | X bytes | b"R" | X bytes | | b"R" | X bytes |
64
+
65
+ * task_id: Task ID
66
+ * source: Source ID. This is used to choose the proper serializer for deserializing the
67
+ function and arguments. The serializer object ID is md5 hash of `source + b"serializer"` and the serializer object
68
+ bytes data must first be deserialized using cloudpickle. source is essentially the client id, indicate which client
69
+ this task it belongs to, please refer to `scaler.utility.object_utility` for `generate_serializer_object_id`
70
+ * metadata: The metadata of the task, can be empty bytes like `b""`
71
+ * func_object_id: Function object ID
72
+ * arg type: Must be type `b"R"` for ObjectID
73
+ * arg data: Object ID
74
+
75
+ After executing `Task`, the worker should send a `TaskResult` message with the final task result.
76
+
77
+ ### TaskCancel `TC`
78
+
79
+ | message_type | task_id |
80
+ |:------------:|:-------:|
81
+ | b"TC" | X bytes |
82
+
83
+ * task_id: Task ID to cancel
84
+
85
+ When a `TaskCancel` message is received, the worker should cancel the task with the given task ID and send a
86
+ `TaskResult`
87
+
88
+ ### ObjectInstruction `OI`
89
+
90
+ | message_type | source | type | num_object_ids | num_object_names | num_object_bytes | object 1 id | (...) | object N id |
91
+ |:------------:|:-------:|:----:|:--------------:|:----------------:|:----------------:|:-----------:|:-----:|:-----------:|
92
+ | b"OI" | X bytes | b"D" | unsigned int | unsigned int | unsigned int | X bytes | | X bytes |
93
+
94
+ * source: Source ID
95
+ * type: Must be `b"D` for Delete
96
+ * num_object_ids: Number of object IDs
97
+ * num_object_names: Number of object names, value must be zero
98
+ * num_object_bytes: Number of object bytes, value must be zero
99
+ * object id: Object ID
100
+
101
+ When a Delete `ObjectInstruction` message is received, the worker should delete the objects with the given object IDs on
102
+ worker side
103
+
104
+ ### ObjectResponse `OA`
105
+
106
+ | message_type | type | num_object_ids | num_object_names | num_object_bytes | object 1 id | (...) | object N id | object 1 name | (...) | object N name | object 1 bytes | (...) | object N bytes |
107
+ |:------------:|:------:|:--------------:|:----------------:|:----------------:|:-----------:|:-----:|:-----------:|:-------------:|:-----:|:-------------:|:--------------:|:-----:|:--------------:|
108
+ | b"OA" | 1 byte | unsigned int | unsigned int | unsigned int | X bytes | | X bytes | X bytes | | X bytes | X bytes | | X bytes |
109
+
110
+ * type: `b"C"` for object found, `b"N"` for object not found
111
+ * num_object_ids: Number of object IDs
112
+ * num_object_names: Number of object names
113
+ * num_object_bytes: Number of object bytes
114
+ * object id: Object ID
115
+ * object name: Object name
116
+ * object bytes: Object bytes
117
+
118
+ ### BalanceRequest `BQ` (will be replaced by TaskCancel in the future, low priority to implement this)
119
+
120
+ | message_type | num_tasks |
121
+ |:------------:|:------------:|
122
+ | b"BQ" | unsigned int |
123
+
124
+ * num_tasks: Number of tasks to give up
125
+
126
+ When a `BalanceRequest` message is received, the worker should send a `BalanceResponse` message with num_tasks number of
127
+ task IDs.
128
+
129
+ ### WorkerHeartbeatEcho `HE` (optional)
130
+
131
+ | message_type | empty |
132
+ |:------------:|:-------:|
133
+ | b"HE" | 0 bytes |
134
+
135
+ A `WorkerHeartbeatEcho` message indicates that the scheduler has received the worker's `WorkerHeartbeat` message.
136
+
137
+ ### ClientDisconnect `CS` (optional)
138
+
139
+ | message_type | type |
140
+ |:------------:|:----:|
141
+ | b"CS" | b"S" |
142
+
143
+ * type: Must be type `b"S"` for Shutdown.
144
+
145
+ When a Shutdown `ClientDisconnect` message is received, the worker should shutdown.
146
+
147
+ ## Send messages
148
+
149
+ ### TaskResult `TR`
150
+
151
+ | message_type | task_id | status | result | metadata |
152
+ |:------------:|:-------:|:------:|:-------:|:--------:|
153
+ | b"TR" | X bytes | 1 byte | X bytes | X bytes |
154
+
155
+ * task_id: Task ID
156
+ * status: `b"S"` for Success, `b"F"` for Failed, `b"C"` for Canceled, `b"K"` for WorkerDied, `b"W"` for NoWorker, `b"I"`
157
+ for Inactive, `b"R"` for Running, and `b"X"` for Canceling
158
+ * result: Task result object ID
159
+ * metadata: Task metadata
160
+
161
+ Worker must submit a Create `ObjectInstruction` message BEFORE returning the task result containing a task result object
162
+ ID.
163
+
164
+ ### ObjectInstruction `OI`
165
+
166
+ | message_type | source | type | num_object_ids | num_object_names | num_object_bytes | object 1 id | (...) | object N id | object 1 name | (...) | object N name | object 1 bytes | (...) | object N bytes |
167
+ |:------------:|:-------:|:----:|:--------------:|:----------------:|:----------------:|:-----------:|:-----:|:-----------:|:-------------:|:-----:|:-------------:|:--------------:|:-----:|:--------------:|
168
+ | b"OI" | X bytes | b"C" | unsigned int | unsigned int | unsigned int | X bytes | | X bytes | X bytes | | X bytes | X bytes | | X bytes |
169
+
170
+ * source: Source ID (this should be the same as the source in the corresponding `Task` message)
171
+ * type: Must be `b"C` for Create
172
+ * num_object_ids: Number of object IDs
173
+ * num_object_names: Number of object names
174
+ * num_object_bytes: Number of object bytes
175
+ * object id: Object ID (please use uuid.uuid4.bytes for object id)
176
+ * object name: Object name
177
+ * object bytes: Object bytes
178
+
179
+ ### ObjectRequest `OR`
180
+
181
+ | message_type | type | object 1 id | (...) | object N id |
182
+ |:------------:|:----:|:-----------:|:-----:|:-----------:|
183
+ | b"OR" | b"A" | X bytes | | X bytes |
184
+
185
+ * type: Must be `b"A"` for Get
186
+ * object id: Object ID
187
+
188
+ When received a `Task` message, all the functions and arguments are just object IDs, the worker will need get object
189
+ content by sending `ObjectRequest` message to the scheduler, the scheduler will reply with `ObjectResponse` message,
190
+ then worker can deserialize the object content and start executing the task
191
+
192
+ ### BalanceResponse `BR`
193
+
194
+ | message_type | task 1 id | (...) | task N id |
195
+ |:------------:|:---------:|:-----:|:---------:|
196
+ | b"BR" | X bytes | | X bytes |
197
+
198
+ * task id: Task ID
199
+
200
+ ### WorkerHeartbeat `HB`
201
+
202
+ | message_type | agent_cpu | agent_rss | worker_cpu | worker_rss | rss_free | queued_tasks | latency_us | initialized | has_task | task_lock |
203
+ |:------------:|:--------------:|:------------------:|:--------------:|:------------------:|:------------------:|:--------------:|:------------:|:-----------:|:--------:|:---------:|
204
+ | b"HB" | unsigned short | unsigned long long | unsigned short | unsigned long long | unsigned long long | unsigned short | unsigned int | _Bool | _Bool | _Bool |
205
+
206
+ * agent_cpu: Agent CPU usage
207
+ * agent_rss: Agent resident set size in bytes
208
+ * worker_cpu: Worker CPU usage
209
+ * worker_rss: Worker resident set size in bytes
210
+ * rss_free: Free memory in bytes
211
+ * queued_tasks: Number of queued tasks
212
+ * latency_us: Latency in microseconds
213
+ * initialized: Worker initialized
214
+ * has_task: Worker has task
215
+ * task_lock: Worker task lock
216
+
217
+ Worker must send a `WorkerHeartbeat` message every heartbeat interval (at least 1 second) or else the scheduler will
218
+ consider the worker dead.
219
+
220
+ ### DisconnectRequest `DR` (optional)
221
+
222
+ | message_type | worker |
223
+ |:------------:|:-------:|
224
+ | b"DR" | X bytes |
225
+
226
+ * worker: Worker ID
227
+
228
+ When a `DisconnectRequest` message is sent, the worker should disconnect from the scheduler.
File without changes
File without changes
@@ -0,0 +1,9 @@
1
+ import enum
2
+
3
+ from scaler.scheduler.allocate_policy.capability_allocate_policy import CapabilityAllocatePolicy
4
+ from scaler.scheduler.allocate_policy.even_load_allocate_policy import EvenLoadAllocatePolicy
5
+
6
+
7
+ class AllocatePolicy(enum.Enum):
8
+ even = EvenLoadAllocatePolicy
9
+ capability = CapabilityAllocatePolicy