opengris-scaler 1.12.37__cp38-cp38-musllinux_1_2_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opengris_scaler-1.12.37.dist-info/METADATA +730 -0
- opengris_scaler-1.12.37.dist-info/RECORD +196 -0
- opengris_scaler-1.12.37.dist-info/WHEEL +5 -0
- opengris_scaler-1.12.37.dist-info/entry_points.txt +10 -0
- opengris_scaler-1.12.37.dist-info/licenses/LICENSE +201 -0
- opengris_scaler-1.12.37.dist-info/licenses/LICENSE.spdx +7 -0
- opengris_scaler-1.12.37.dist-info/licenses/NOTICE +8 -0
- opengris_scaler.libs/libcapnp-1-e88d5415.0.1.so +0 -0
- opengris_scaler.libs/libgcc_s-2298274a.so.1 +0 -0
- opengris_scaler.libs/libkj-1-9bebd8ac.0.1.so +0 -0
- opengris_scaler.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
- scaler/__init__.py +14 -0
- scaler/about.py +5 -0
- scaler/client/__init__.py +0 -0
- scaler/client/agent/__init__.py +0 -0
- scaler/client/agent/client_agent.py +218 -0
- scaler/client/agent/disconnect_manager.py +27 -0
- scaler/client/agent/future_manager.py +112 -0
- scaler/client/agent/heartbeat_manager.py +74 -0
- scaler/client/agent/mixins.py +89 -0
- scaler/client/agent/object_manager.py +98 -0
- scaler/client/agent/task_manager.py +64 -0
- scaler/client/client.py +672 -0
- scaler/client/future.py +252 -0
- scaler/client/object_buffer.py +129 -0
- scaler/client/object_reference.py +25 -0
- scaler/client/serializer/__init__.py +0 -0
- scaler/client/serializer/default.py +16 -0
- scaler/client/serializer/mixins.py +38 -0
- scaler/cluster/__init__.py +0 -0
- scaler/cluster/cluster.py +95 -0
- scaler/cluster/combo.py +157 -0
- scaler/cluster/object_storage_server.py +45 -0
- scaler/cluster/scheduler.py +86 -0
- scaler/config/__init__.py +0 -0
- scaler/config/common/__init__.py +0 -0
- scaler/config/common/logging.py +41 -0
- scaler/config/common/web.py +18 -0
- scaler/config/common/worker.py +65 -0
- scaler/config/common/worker_adapter.py +28 -0
- scaler/config/config_class.py +317 -0
- scaler/config/defaults.py +94 -0
- scaler/config/mixins.py +20 -0
- scaler/config/section/__init__.py +0 -0
- scaler/config/section/cluster.py +66 -0
- scaler/config/section/ecs_worker_adapter.py +78 -0
- scaler/config/section/native_worker_adapter.py +30 -0
- scaler/config/section/object_storage_server.py +13 -0
- scaler/config/section/scheduler.py +126 -0
- scaler/config/section/symphony_worker_adapter.py +35 -0
- scaler/config/section/top.py +16 -0
- scaler/config/section/webui.py +16 -0
- scaler/config/types/__init__.py +0 -0
- scaler/config/types/network_backend.py +12 -0
- scaler/config/types/object_storage_server.py +45 -0
- scaler/config/types/worker.py +67 -0
- scaler/config/types/zmq.py +83 -0
- scaler/entry_points/__init__.py +0 -0
- scaler/entry_points/cluster.py +10 -0
- scaler/entry_points/object_storage_server.py +26 -0
- scaler/entry_points/scheduler.py +51 -0
- scaler/entry_points/top.py +272 -0
- scaler/entry_points/webui.py +6 -0
- scaler/entry_points/worker_adapter_ecs.py +22 -0
- scaler/entry_points/worker_adapter_native.py +31 -0
- scaler/entry_points/worker_adapter_symphony.py +26 -0
- scaler/io/__init__.py +0 -0
- scaler/io/async_binder.py +89 -0
- scaler/io/async_connector.py +95 -0
- scaler/io/async_object_storage_connector.py +225 -0
- scaler/io/mixins.py +154 -0
- scaler/io/sync_connector.py +68 -0
- scaler/io/sync_object_storage_connector.py +249 -0
- scaler/io/sync_subscriber.py +83 -0
- scaler/io/utility.py +80 -0
- scaler/io/ymq/__init__.py +0 -0
- scaler/io/ymq/_ymq.pyi +95 -0
- scaler/io/ymq/_ymq.so +0 -0
- scaler/io/ymq/ymq.py +138 -0
- scaler/io/ymq_async_object_storage_connector.py +184 -0
- scaler/io/ymq_sync_object_storage_connector.py +184 -0
- scaler/object_storage/__init__.py +0 -0
- scaler/object_storage/object_storage_server.so +0 -0
- scaler/protocol/__init__.py +0 -0
- scaler/protocol/capnp/__init__.py +0 -0
- scaler/protocol/capnp/_python.py +6 -0
- scaler/protocol/capnp/common.capnp +68 -0
- scaler/protocol/capnp/message.capnp +218 -0
- scaler/protocol/capnp/object_storage.capnp +57 -0
- scaler/protocol/capnp/status.capnp +73 -0
- scaler/protocol/introduction.md +105 -0
- scaler/protocol/python/__init__.py +0 -0
- scaler/protocol/python/common.py +140 -0
- scaler/protocol/python/message.py +751 -0
- scaler/protocol/python/mixins.py +13 -0
- scaler/protocol/python/object_storage.py +118 -0
- scaler/protocol/python/status.py +279 -0
- scaler/protocol/worker.md +228 -0
- scaler/scheduler/__init__.py +0 -0
- scaler/scheduler/allocate_policy/__init__.py +0 -0
- scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
- scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
- scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
- scaler/scheduler/allocate_policy/mixins.py +55 -0
- scaler/scheduler/controllers/__init__.py +0 -0
- scaler/scheduler/controllers/balance_controller.py +65 -0
- scaler/scheduler/controllers/client_controller.py +131 -0
- scaler/scheduler/controllers/config_controller.py +31 -0
- scaler/scheduler/controllers/graph_controller.py +424 -0
- scaler/scheduler/controllers/information_controller.py +81 -0
- scaler/scheduler/controllers/mixins.py +194 -0
- scaler/scheduler/controllers/object_controller.py +147 -0
- scaler/scheduler/controllers/scaling_policies/__init__.py +0 -0
- scaler/scheduler/controllers/scaling_policies/fixed_elastic.py +145 -0
- scaler/scheduler/controllers/scaling_policies/mixins.py +10 -0
- scaler/scheduler/controllers/scaling_policies/null.py +14 -0
- scaler/scheduler/controllers/scaling_policies/types.py +9 -0
- scaler/scheduler/controllers/scaling_policies/utility.py +20 -0
- scaler/scheduler/controllers/scaling_policies/vanilla.py +95 -0
- scaler/scheduler/controllers/task_controller.py +376 -0
- scaler/scheduler/controllers/worker_controller.py +169 -0
- scaler/scheduler/object_usage/__init__.py +0 -0
- scaler/scheduler/object_usage/object_tracker.py +131 -0
- scaler/scheduler/scheduler.py +251 -0
- scaler/scheduler/task/__init__.py +0 -0
- scaler/scheduler/task/task_state_machine.py +92 -0
- scaler/scheduler/task/task_state_manager.py +61 -0
- scaler/ui/__init__.py +0 -0
- scaler/ui/common/__init__.py +0 -0
- scaler/ui/common/constants.py +9 -0
- scaler/ui/common/live_display.py +147 -0
- scaler/ui/common/memory_window.py +146 -0
- scaler/ui/common/setting_page.py +40 -0
- scaler/ui/common/task_graph.py +840 -0
- scaler/ui/common/task_log.py +111 -0
- scaler/ui/common/utility.py +66 -0
- scaler/ui/common/webui.py +80 -0
- scaler/ui/common/worker_processors.py +104 -0
- scaler/ui/v1.py +76 -0
- scaler/ui/v2.py +102 -0
- scaler/ui/webui.py +21 -0
- scaler/utility/__init__.py +0 -0
- scaler/utility/debug.py +19 -0
- scaler/utility/event_list.py +63 -0
- scaler/utility/event_loop.py +58 -0
- scaler/utility/exceptions.py +42 -0
- scaler/utility/formatter.py +44 -0
- scaler/utility/graph/__init__.py +0 -0
- scaler/utility/graph/optimization.py +27 -0
- scaler/utility/graph/topological_sorter.py +11 -0
- scaler/utility/graph/topological_sorter_graphblas.py +174 -0
- scaler/utility/identifiers.py +107 -0
- scaler/utility/logging/__init__.py +0 -0
- scaler/utility/logging/decorators.py +25 -0
- scaler/utility/logging/scoped_logger.py +33 -0
- scaler/utility/logging/utility.py +183 -0
- scaler/utility/many_to_many_dict.py +123 -0
- scaler/utility/metadata/__init__.py +0 -0
- scaler/utility/metadata/profile_result.py +31 -0
- scaler/utility/metadata/task_flags.py +30 -0
- scaler/utility/mixins.py +13 -0
- scaler/utility/network_util.py +7 -0
- scaler/utility/one_to_many_dict.py +72 -0
- scaler/utility/queues/__init__.py +0 -0
- scaler/utility/queues/async_indexed_queue.py +37 -0
- scaler/utility/queues/async_priority_queue.py +70 -0
- scaler/utility/queues/async_sorted_priority_queue.py +45 -0
- scaler/utility/queues/indexed_queue.py +114 -0
- scaler/utility/serialization.py +9 -0
- scaler/version.txt +1 -0
- scaler/worker/__init__.py +0 -0
- scaler/worker/agent/__init__.py +0 -0
- scaler/worker/agent/heartbeat_manager.py +110 -0
- scaler/worker/agent/mixins.py +137 -0
- scaler/worker/agent/processor/__init__.py +0 -0
- scaler/worker/agent/processor/object_cache.py +107 -0
- scaler/worker/agent/processor/processor.py +285 -0
- scaler/worker/agent/processor/streaming_buffer.py +28 -0
- scaler/worker/agent/processor_holder.py +147 -0
- scaler/worker/agent/processor_manager.py +369 -0
- scaler/worker/agent/profiling_manager.py +109 -0
- scaler/worker/agent/task_manager.py +150 -0
- scaler/worker/agent/timeout_manager.py +19 -0
- scaler/worker/preload.py +84 -0
- scaler/worker/worker.py +265 -0
- scaler/worker_adapter/__init__.py +0 -0
- scaler/worker_adapter/common.py +26 -0
- scaler/worker_adapter/ecs.py +241 -0
- scaler/worker_adapter/native.py +138 -0
- scaler/worker_adapter/symphony/__init__.py +0 -0
- scaler/worker_adapter/symphony/callback.py +45 -0
- scaler/worker_adapter/symphony/heartbeat_manager.py +82 -0
- scaler/worker_adapter/symphony/message.py +24 -0
- scaler/worker_adapter/symphony/task_manager.py +289 -0
- scaler/worker_adapter/symphony/worker.py +204 -0
- scaler/worker_adapter/symphony/worker_adapter.py +123 -0
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
import socket
|
|
2
|
+
import uuid
|
|
3
|
+
from threading import Lock
|
|
4
|
+
from typing import Iterable, Optional
|
|
5
|
+
|
|
6
|
+
from scaler.io.mixins import SyncObjectStorageConnector
|
|
7
|
+
from scaler.io.ymq.ymq import IOContext, IOSocket, IOSocketType, Message, YMQException
|
|
8
|
+
from scaler.protocol.capnp._python import _object_storage # noqa
|
|
9
|
+
from scaler.protocol.python.object_storage import ObjectRequestHeader, ObjectResponseHeader, to_capnp_object_id
|
|
10
|
+
from scaler.utility.exceptions import ObjectStorageException
|
|
11
|
+
from scaler.utility.identifiers import ObjectID
|
|
12
|
+
|
|
13
|
+
# Some OSes raise an OSError when sending buffers too large with send() or sendmsg().
|
|
14
|
+
MAX_CHUNK_SIZE = 128 * 1024 * 1024
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class PyYMQSyncObjectStorageConnector(SyncObjectStorageConnector):
|
|
18
|
+
"""An synchronous connector that uses an raw TCP socket to connect to a Scaler's object storage instance."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, host: str, port: int):
|
|
21
|
+
self._host = host
|
|
22
|
+
self._port = port
|
|
23
|
+
|
|
24
|
+
self._next_request_id = 0
|
|
25
|
+
|
|
26
|
+
self._socket_lock = Lock()
|
|
27
|
+
|
|
28
|
+
self._identity: str = f"{self.__class__.__name__}|{socket.gethostname().split('.')[0]}|{uuid.uuid4()}"
|
|
29
|
+
self._io_context = IOContext()
|
|
30
|
+
self._io_socket: IOSocket = self._io_context.createIOSocket_sync(self._identity, IOSocketType.Connector)
|
|
31
|
+
self._io_socket.connect_sync(self.address)
|
|
32
|
+
|
|
33
|
+
def __del__(self):
|
|
34
|
+
self.destroy()
|
|
35
|
+
|
|
36
|
+
def destroy(self):
|
|
37
|
+
with self._socket_lock:
|
|
38
|
+
if self._io_socket is not None:
|
|
39
|
+
self._io_socket = None
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def address(self) -> str:
|
|
43
|
+
return f"tcp://{self._host}:{self._port}"
|
|
44
|
+
|
|
45
|
+
def set_object(self, object_id: ObjectID, payload: bytes):
|
|
46
|
+
"""
|
|
47
|
+
Sets the object's payload on the object storage server.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
with self._socket_lock:
|
|
51
|
+
self.__send_request(object_id, len(payload), ObjectRequestHeader.ObjectRequestType.SetObject, payload)
|
|
52
|
+
response_header, response_payload = self.__receive_response()
|
|
53
|
+
|
|
54
|
+
self.__ensure_response_type(response_header, [ObjectResponseHeader.ObjectResponseType.SetOK])
|
|
55
|
+
self.__ensure_empty_payload(response_payload)
|
|
56
|
+
|
|
57
|
+
def get_object(self, object_id: ObjectID, max_payload_length: int = 2**64 - 1) -> bytearray:
|
|
58
|
+
"""
|
|
59
|
+
Returns the object's payload from the object storage server.
|
|
60
|
+
|
|
61
|
+
Will block until the object is available.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
with self._socket_lock:
|
|
65
|
+
self.__send_request(object_id, max_payload_length, ObjectRequestHeader.ObjectRequestType.GetObject)
|
|
66
|
+
response_header, response_payload = self.__receive_response()
|
|
67
|
+
|
|
68
|
+
self.__ensure_response_type(response_header, [ObjectResponseHeader.ObjectResponseType.GetOK])
|
|
69
|
+
|
|
70
|
+
return response_payload
|
|
71
|
+
|
|
72
|
+
def delete_object(self, object_id: ObjectID) -> bool:
|
|
73
|
+
"""
|
|
74
|
+
Removes the object from the object storage server.
|
|
75
|
+
|
|
76
|
+
Returns `False` if the object wasn't found in the server. Otherwise returns `True`.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
with self._socket_lock:
|
|
80
|
+
self.__send_request(object_id, 0, ObjectRequestHeader.ObjectRequestType.DeleteObject)
|
|
81
|
+
response_header, response_payload = self.__receive_response()
|
|
82
|
+
|
|
83
|
+
self.__ensure_response_type(
|
|
84
|
+
response_header,
|
|
85
|
+
[ObjectResponseHeader.ObjectResponseType.DelOK, ObjectResponseHeader.ObjectResponseType.DelNotExists],
|
|
86
|
+
)
|
|
87
|
+
self.__ensure_empty_payload(response_payload)
|
|
88
|
+
|
|
89
|
+
return response_header.response_type == ObjectResponseHeader.ObjectResponseType.DelOK
|
|
90
|
+
|
|
91
|
+
def duplicate_object_id(self, object_id: ObjectID, new_object_id: ObjectID) -> None:
|
|
92
|
+
"""
|
|
93
|
+
Link an object's content to a new object ID on the object storage server.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
object_id_payload = to_capnp_object_id(object_id).to_bytes()
|
|
97
|
+
|
|
98
|
+
with self._socket_lock:
|
|
99
|
+
self.__send_request(
|
|
100
|
+
new_object_id,
|
|
101
|
+
len(object_id_payload),
|
|
102
|
+
ObjectRequestHeader.ObjectRequestType.DuplicateObjectID,
|
|
103
|
+
object_id_payload,
|
|
104
|
+
)
|
|
105
|
+
response_header, response_payload = self.__receive_response()
|
|
106
|
+
|
|
107
|
+
self.__ensure_response_type(response_header, [ObjectResponseHeader.ObjectResponseType.DuplicateOK])
|
|
108
|
+
self.__ensure_empty_payload(response_payload)
|
|
109
|
+
|
|
110
|
+
def __ensure_is_connected(self):
|
|
111
|
+
if self._io_socket is None:
|
|
112
|
+
raise ObjectStorageException("connector is closed.")
|
|
113
|
+
|
|
114
|
+
def __ensure_response_type(
|
|
115
|
+
self, header: ObjectResponseHeader, valid_response_types: Iterable[ObjectResponseHeader.ObjectResponseType]
|
|
116
|
+
):
|
|
117
|
+
if header.response_type not in valid_response_types:
|
|
118
|
+
raise RuntimeError(f"unexpected object storage response_type={header.response_type}.")
|
|
119
|
+
|
|
120
|
+
def __ensure_empty_payload(self, payload: bytearray):
|
|
121
|
+
if len(payload) != 0:
|
|
122
|
+
raise RuntimeError(f"unexpected response payload_length={len(payload)}, expected 0.")
|
|
123
|
+
|
|
124
|
+
def __send_request(
|
|
125
|
+
self,
|
|
126
|
+
object_id: ObjectID,
|
|
127
|
+
payload_length: int,
|
|
128
|
+
request_type: ObjectRequestHeader.ObjectRequestType,
|
|
129
|
+
payload: Optional[bytes] = None,
|
|
130
|
+
):
|
|
131
|
+
self.__ensure_is_connected()
|
|
132
|
+
assert self._io_socket is not None
|
|
133
|
+
|
|
134
|
+
request_id = self._next_request_id
|
|
135
|
+
self._next_request_id += 1
|
|
136
|
+
self._next_request_id %= 2**64 - 1 # UINT64_MAX
|
|
137
|
+
|
|
138
|
+
header = ObjectRequestHeader.new_msg(object_id, payload_length, request_id, request_type)
|
|
139
|
+
header_bytes = header.get_message().to_bytes()
|
|
140
|
+
|
|
141
|
+
if payload is not None:
|
|
142
|
+
self._io_socket.send_sync(Message(address=None, payload=header_bytes))
|
|
143
|
+
self._io_socket.send_sync(Message(address=None, payload=payload))
|
|
144
|
+
else:
|
|
145
|
+
self._io_socket.send_sync(Message(address=None, payload=header_bytes))
|
|
146
|
+
|
|
147
|
+
def __receive_response(self):
|
|
148
|
+
assert self._io_socket is not None
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
header = self.__read_response_header()
|
|
152
|
+
payload = self.__read_response_payload(header)
|
|
153
|
+
return header, payload
|
|
154
|
+
except YMQException:
|
|
155
|
+
self.__raise_connection_failure()
|
|
156
|
+
|
|
157
|
+
def __read_response_header(self) -> ObjectResponseHeader:
|
|
158
|
+
assert self._io_socket is not None
|
|
159
|
+
|
|
160
|
+
header_bytes = self._io_socket.recv_sync().payload.data
|
|
161
|
+
if header_bytes is None:
|
|
162
|
+
self.__raise_connection_failure()
|
|
163
|
+
|
|
164
|
+
# pycapnp does not like to read from a bytearray object. This look like an not-yet-resolved issue.
|
|
165
|
+
# That's is annoying because it leads to an unnecessary copy of the header's buffer.
|
|
166
|
+
# See https://github.com/capnproto/pycapnp/issues/153
|
|
167
|
+
# header_bytes = bytes(header_bytearray)
|
|
168
|
+
|
|
169
|
+
with _object_storage.ObjectResponseHeader.from_bytes(header_bytes) as header_message:
|
|
170
|
+
return ObjectResponseHeader(header_message)
|
|
171
|
+
|
|
172
|
+
def __read_response_payload(self, header: ObjectResponseHeader) -> bytearray:
|
|
173
|
+
if header.payload_length > 0:
|
|
174
|
+
res = self._io_socket.recv_sync().payload.data
|
|
175
|
+
if res is None:
|
|
176
|
+
self.__raise_connection_failure()
|
|
177
|
+
assert len(res) == header.payload_length
|
|
178
|
+
return bytearray(res)
|
|
179
|
+
else:
|
|
180
|
+
return bytearray()
|
|
181
|
+
|
|
182
|
+
@staticmethod
|
|
183
|
+
def __raise_connection_failure():
|
|
184
|
+
raise ObjectStorageException("connection failure to object storage server.")
|
|
File without changes
|
|
Binary file
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import capnp # noqa
|
|
2
|
+
|
|
3
|
+
import scaler.protocol.capnp.common_capnp as _common # noqa
|
|
4
|
+
import scaler.protocol.capnp.message_capnp as _message # noqa
|
|
5
|
+
import scaler.protocol.capnp.object_storage_capnp as _object_storage # noqa
|
|
6
|
+
import scaler.protocol.capnp.status_capnp as _status # noqa
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
@0xf57f79ac88fab620;
|
|
2
|
+
|
|
3
|
+
enum TaskResultType {
|
|
4
|
+
success @0; # if submit and task is done and get result
|
|
5
|
+
failed @1; # if submit and task is failed on worker
|
|
6
|
+
failedWorkerDied @2; # if submit and worker died (only happened when scheduler keep_task=False)
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
enum TaskCancelConfirmType {
|
|
10
|
+
canceled @0; # if cancel success
|
|
11
|
+
cancelFailed @1; # if cancel failed, this might happened if the task is in process
|
|
12
|
+
cancelNotFound @2; # if cancel cannot find such task
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
enum TaskTransition {
|
|
16
|
+
hasCapacity @0;
|
|
17
|
+
taskResultSuccess @1;
|
|
18
|
+
taskResultFailed @2;
|
|
19
|
+
taskResultWorkerDied @3;
|
|
20
|
+
taskCancel @4;
|
|
21
|
+
taskCancelConfirmCanceled @5;
|
|
22
|
+
taskCancelConfirmFailed @6;
|
|
23
|
+
taskCancelConfirmNotFound @7;
|
|
24
|
+
balanceTaskCancel @8;
|
|
25
|
+
workerDisconnect @9;
|
|
26
|
+
schedulerHasTask @10;
|
|
27
|
+
schedulerHasNoTask @11;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
enum TaskState {
|
|
31
|
+
inactive @0;
|
|
32
|
+
running @1;
|
|
33
|
+
canceling @2;
|
|
34
|
+
balanceCanceling @3;
|
|
35
|
+
success @4;
|
|
36
|
+
failed @5;
|
|
37
|
+
failedWorkerDied @6;
|
|
38
|
+
canceled @7;
|
|
39
|
+
canceledNotFound @8;
|
|
40
|
+
balanceCanceled @9;
|
|
41
|
+
workerDisconnecting @10;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
enum WorkerState {
|
|
45
|
+
connected @0;
|
|
46
|
+
disconnected @1;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
struct TaskCapability {
|
|
50
|
+
name @0 :Text; # the name of the capability provided by the worker/required by the task (e.g. "gpu" or "linux")
|
|
51
|
+
value @1 :Int64; # the quantity of the capability provided/required. Use -1 for quantity-less capabilities
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
struct ObjectMetadata {
|
|
55
|
+
objectIds @0 :List(Data);
|
|
56
|
+
objectTypes @1 :List(ObjectContentType);
|
|
57
|
+
objectNames @2 :List(Data);
|
|
58
|
+
|
|
59
|
+
enum ObjectContentType {
|
|
60
|
+
serializer @0;
|
|
61
|
+
object @1;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
struct ObjectStorageAddress {
|
|
66
|
+
host @0 :Text;
|
|
67
|
+
port @1 :UInt16;
|
|
68
|
+
}
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
@0xaf44f44ea94a4675;
|
|
2
|
+
|
|
3
|
+
using CommonType = import "common.capnp";
|
|
4
|
+
using Status = import "status.capnp";
|
|
5
|
+
|
|
6
|
+
struct Task {
|
|
7
|
+
taskId @0 :Data;
|
|
8
|
+
source @1 :Data;
|
|
9
|
+
metadata @2 :Data;
|
|
10
|
+
funcObjectId @3 :Data;
|
|
11
|
+
functionArgs @4 :List(Argument);
|
|
12
|
+
capabilities @5 :List(CommonType.TaskCapability);
|
|
13
|
+
|
|
14
|
+
struct Argument {
|
|
15
|
+
type @0 :ArgumentType;
|
|
16
|
+
data @1 :Data;
|
|
17
|
+
|
|
18
|
+
enum ArgumentType {
|
|
19
|
+
task @0;
|
|
20
|
+
objectID @1;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
struct TaskCancel {
|
|
26
|
+
struct TaskCancelFlags {
|
|
27
|
+
force @0 :Bool;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
taskId @0 :Data;
|
|
31
|
+
flags @1 :TaskCancelFlags;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
struct TaskLog {
|
|
35
|
+
taskId @0 :Data;
|
|
36
|
+
logType @1 :LogType;
|
|
37
|
+
content @2 :Text;
|
|
38
|
+
|
|
39
|
+
enum LogType {
|
|
40
|
+
stdout @0;
|
|
41
|
+
stderr @1;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
struct TaskResult {
|
|
46
|
+
taskId @0 :Data;
|
|
47
|
+
resultType @1 :CommonType.TaskResultType;
|
|
48
|
+
metadata @2 :Data;
|
|
49
|
+
results @3 :List(Data);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
struct TaskCancelConfirm {
|
|
53
|
+
taskId @0 :Data;
|
|
54
|
+
cancelConfirmType @1 :CommonType.TaskCancelConfirmType;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
struct GraphTask {
|
|
58
|
+
taskId @0 :Data;
|
|
59
|
+
source @1 :Data;
|
|
60
|
+
targets @2 :List(Data);
|
|
61
|
+
graph @3 :List(Task);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
struct ClientHeartbeat {
|
|
65
|
+
resource @0 :Status.Resource;
|
|
66
|
+
latencyUS @1 :UInt32;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
struct ClientHeartbeatEcho {
|
|
70
|
+
objectStorageAddress @0 :CommonType.ObjectStorageAddress;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
struct WorkerHeartbeat {
|
|
74
|
+
agent @0 :Status.Resource;
|
|
75
|
+
rssFree @1 :UInt64;
|
|
76
|
+
queueSize @2 :UInt32;
|
|
77
|
+
queuedTasks @3 :UInt32;
|
|
78
|
+
latencyUS @4 :UInt32;
|
|
79
|
+
taskLock @5 :Bool;
|
|
80
|
+
processors @6 :List(Status.ProcessorStatus);
|
|
81
|
+
capabilities @7 :List(CommonType.TaskCapability);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
struct WorkerHeartbeatEcho {
|
|
85
|
+
objectStorageAddress @0 :CommonType.ObjectStorageAddress;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
struct ObjectInstruction {
|
|
89
|
+
instructionType @0 :ObjectInstructionType;
|
|
90
|
+
objectUser @1 :Data;
|
|
91
|
+
objectMetadata @2 :CommonType.ObjectMetadata;
|
|
92
|
+
|
|
93
|
+
enum ObjectInstructionType {
|
|
94
|
+
create @0;
|
|
95
|
+
delete @1;
|
|
96
|
+
clear @2;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
struct DisconnectRequest {
|
|
101
|
+
worker @0 :Data;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
struct DisconnectResponse {
|
|
105
|
+
worker @0 :Data;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
struct ClientDisconnect {
|
|
109
|
+
disconnectType @0 :DisconnectType;
|
|
110
|
+
|
|
111
|
+
enum DisconnectType {
|
|
112
|
+
disconnect @0;
|
|
113
|
+
shutdown @1;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
struct ClientShutdownResponse {
|
|
118
|
+
accepted @0 :Bool;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
struct StateClient {
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
struct StateObject {
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
struct StateBalanceAdvice {
|
|
128
|
+
workerId @0 :Data;
|
|
129
|
+
taskIds @1 :List(Data);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
struct StateScheduler {
|
|
133
|
+
binder @0 :Status.BinderStatus;
|
|
134
|
+
scheduler @1 :Status.Resource;
|
|
135
|
+
rssFree @2 :UInt64;
|
|
136
|
+
clientManager @3 :Status.ClientManagerStatus;
|
|
137
|
+
objectManager @4 :Status.ObjectManagerStatus;
|
|
138
|
+
taskManager @5 :Status.TaskManagerStatus;
|
|
139
|
+
workerManager @6 :Status.WorkerManagerStatus;
|
|
140
|
+
scalingManager @7 :Status.ScalingManagerStatus;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
struct StateWorker {
|
|
144
|
+
workerId @0 :Data;
|
|
145
|
+
state@1 :CommonType.WorkerState;
|
|
146
|
+
capabilities @2 :List(CommonType.TaskCapability);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
struct StateTask {
|
|
150
|
+
taskId @0 :Data;
|
|
151
|
+
functionName @1 :Data;
|
|
152
|
+
state @2 :CommonType.TaskState;
|
|
153
|
+
worker @3 :Data;
|
|
154
|
+
capabilities @4 :List(CommonType.TaskCapability);
|
|
155
|
+
metadata @5 :Data;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
struct StateGraphTask {
|
|
159
|
+
enum NodeTaskType {
|
|
160
|
+
normal @0;
|
|
161
|
+
target @1;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
graphTaskId @0 :Data;
|
|
165
|
+
taskId @1 :Data;
|
|
166
|
+
nodeTaskType @2 :NodeTaskType;
|
|
167
|
+
parentTaskIds @3 :List(Data);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
struct ProcessorInitialized {
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
struct InformationRequest {
|
|
174
|
+
request @0 :Data;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
struct InformationResponse {
|
|
178
|
+
response @0 :Data;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
struct Message {
|
|
182
|
+
union {
|
|
183
|
+
task @0 :Task;
|
|
184
|
+
taskCancel @1 :TaskCancel;
|
|
185
|
+
taskCancelConfirm @2 :TaskCancelConfirm;
|
|
186
|
+
taskResult @3 :TaskResult;
|
|
187
|
+
taskLog @4 :TaskLog;
|
|
188
|
+
|
|
189
|
+
graphTask @5 :GraphTask;
|
|
190
|
+
|
|
191
|
+
objectInstruction @6 :ObjectInstruction;
|
|
192
|
+
|
|
193
|
+
clientHeartbeat @7 :ClientHeartbeat;
|
|
194
|
+
clientHeartbeatEcho @8 :ClientHeartbeatEcho;
|
|
195
|
+
|
|
196
|
+
workerHeartbeat @9 :WorkerHeartbeat;
|
|
197
|
+
workerHeartbeatEcho @10 :WorkerHeartbeatEcho;
|
|
198
|
+
|
|
199
|
+
disconnectRequest @11 :DisconnectRequest;
|
|
200
|
+
disconnectResponse @12 :DisconnectResponse;
|
|
201
|
+
|
|
202
|
+
stateClient @13 :StateClient;
|
|
203
|
+
stateObject @14 :StateObject;
|
|
204
|
+
stateBalanceAdvice @15 :StateBalanceAdvice;
|
|
205
|
+
stateScheduler @16 :StateScheduler;
|
|
206
|
+
stateWorker @17 :StateWorker;
|
|
207
|
+
stateTask @18 :StateTask;
|
|
208
|
+
stateGraphTask @19 :StateGraphTask;
|
|
209
|
+
|
|
210
|
+
clientDisconnect @20 :ClientDisconnect;
|
|
211
|
+
clientShutdownResponse @21 :ClientShutdownResponse;
|
|
212
|
+
|
|
213
|
+
processorInitialized @22 :ProcessorInitialized;
|
|
214
|
+
|
|
215
|
+
informationRequest @23 :InformationRequest;
|
|
216
|
+
informationResponse @24 :InformationResponse;
|
|
217
|
+
}
|
|
218
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
@0xc2a14174aa42a12a;
|
|
2
|
+
|
|
3
|
+
using Cxx = import "/capnp/c++.capnp";
|
|
4
|
+
$Cxx.namespace("scaler::protocol");
|
|
5
|
+
|
|
6
|
+
struct ObjectRequestHeader {
|
|
7
|
+
objectID @0: ObjectID; # 32 bytes
|
|
8
|
+
payloadLength @1: UInt64; # 8 bytes
|
|
9
|
+
requestID @2: UInt64; # 8 bytes
|
|
10
|
+
requestType @3: ObjectRequestType; # 2 bytes
|
|
11
|
+
|
|
12
|
+
enum ObjectRequestType {
|
|
13
|
+
# Set or override an object to the message's payload.
|
|
14
|
+
# Overrides the object's content if it already exists
|
|
15
|
+
# Always immediately answers with a setOK message.
|
|
16
|
+
setObject @0;
|
|
17
|
+
|
|
18
|
+
# Get an object's content.
|
|
19
|
+
# If the object does not exist, delays the getOk response until the object is created.
|
|
20
|
+
getObject @1;
|
|
21
|
+
|
|
22
|
+
# Remove the object.
|
|
23
|
+
deleteObject @2;
|
|
24
|
+
|
|
25
|
+
# Creates the provided object ID by linking it to the content of the object ID provided in payload.
|
|
26
|
+
# Overrides the object content if the new object ID already exists.
|
|
27
|
+
# If the referenced object does not exist, delays the duplicateOK response until the original object is created.
|
|
28
|
+
duplicateObjectID @3;
|
|
29
|
+
|
|
30
|
+
# Request the server to give back internal information, result is returned as payload.
|
|
31
|
+
# schema: three uint64_t tuple (number of ids, number of objects (hashes), total actual object size in bytes)
|
|
32
|
+
infoGetTotal @4;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
struct ObjectID {
|
|
37
|
+
field0 @0: UInt64;
|
|
38
|
+
field1 @1: UInt64;
|
|
39
|
+
field2 @2: UInt64;
|
|
40
|
+
field3 @3: UInt64;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
struct ObjectResponseHeader {
|
|
44
|
+
objectID @0: ObjectID;
|
|
45
|
+
payloadLength @1: UInt64;
|
|
46
|
+
responseID @2: UInt64; # 8 bytes
|
|
47
|
+
responseType @3: ObjectResponseType;
|
|
48
|
+
|
|
49
|
+
enum ObjectResponseType {
|
|
50
|
+
setOK @0;
|
|
51
|
+
getOK @1;
|
|
52
|
+
delOK @2;
|
|
53
|
+
delNotExists @3;
|
|
54
|
+
duplicateOK @4;
|
|
55
|
+
infoGetTotalOK @5;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
@0xa4dfa1212ad2d0f0;
|
|
2
|
+
|
|
3
|
+
struct Resource {
|
|
4
|
+
cpu @0 :UInt16; # 99.2% will be represented as 992 as integer
|
|
5
|
+
rss @1 :UInt64; # 32bit is capped to 4GB, so use 64bit to represent
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
struct ObjectManagerStatus {
|
|
9
|
+
numberOfObjects @0 :UInt32;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
struct ClientManagerStatus {
|
|
13
|
+
clientToNumOfTask @0 :List(Pair);
|
|
14
|
+
|
|
15
|
+
struct Pair {
|
|
16
|
+
client @0 :Data;
|
|
17
|
+
numTask @1 :UInt32;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
struct TaskManagerStatus {
|
|
22
|
+
stateToCount @0 :List(Pair);
|
|
23
|
+
|
|
24
|
+
struct Pair {
|
|
25
|
+
state @0 :UInt8;
|
|
26
|
+
count @1 :UInt32;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
struct ProcessorStatus {
|
|
31
|
+
pid @0 :UInt32;
|
|
32
|
+
initialized @1 :Bool;
|
|
33
|
+
hasTask @2 :Bool;
|
|
34
|
+
suspended @3 :Bool;
|
|
35
|
+
resource @4 :Resource;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
struct WorkerStatus {
|
|
39
|
+
workerId @0 :Data;
|
|
40
|
+
agent @1 :Resource;
|
|
41
|
+
rssFree @2 :UInt64;
|
|
42
|
+
free @3 :UInt32;
|
|
43
|
+
sent @4 :UInt32;
|
|
44
|
+
queued @5 :UInt32;
|
|
45
|
+
suspended @6: UInt8;
|
|
46
|
+
lagUS @7 :UInt64;
|
|
47
|
+
lastS @8 :UInt8;
|
|
48
|
+
itl @9 :Text;
|
|
49
|
+
processorStatuses @10 :List(ProcessorStatus);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
struct WorkerManagerStatus {
|
|
53
|
+
workers @0 :List(WorkerStatus);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
struct ScalingManagerStatus {
|
|
57
|
+
workerGroups @0 :List(Pair);
|
|
58
|
+
|
|
59
|
+
struct Pair {
|
|
60
|
+
workerGroupID @0 :Data;
|
|
61
|
+
workerIDs @1 :List(Data);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
struct BinderStatus {
|
|
66
|
+
received @0 :List(Pair);
|
|
67
|
+
sent @1 :List(Pair);
|
|
68
|
+
|
|
69
|
+
struct Pair {
|
|
70
|
+
client @0 :Text;
|
|
71
|
+
number @1 :UInt32;
|
|
72
|
+
}
|
|
73
|
+
}
|