opengris-scaler 1.12.37__cp38-cp38-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. opengris_scaler-1.12.37.dist-info/METADATA +730 -0
  2. opengris_scaler-1.12.37.dist-info/RECORD +196 -0
  3. opengris_scaler-1.12.37.dist-info/WHEEL +5 -0
  4. opengris_scaler-1.12.37.dist-info/entry_points.txt +10 -0
  5. opengris_scaler-1.12.37.dist-info/licenses/LICENSE +201 -0
  6. opengris_scaler-1.12.37.dist-info/licenses/LICENSE.spdx +7 -0
  7. opengris_scaler-1.12.37.dist-info/licenses/NOTICE +8 -0
  8. opengris_scaler.libs/libcapnp-1-e88d5415.0.1.so +0 -0
  9. opengris_scaler.libs/libgcc_s-2298274a.so.1 +0 -0
  10. opengris_scaler.libs/libkj-1-9bebd8ac.0.1.so +0 -0
  11. opengris_scaler.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
  12. scaler/__init__.py +14 -0
  13. scaler/about.py +5 -0
  14. scaler/client/__init__.py +0 -0
  15. scaler/client/agent/__init__.py +0 -0
  16. scaler/client/agent/client_agent.py +218 -0
  17. scaler/client/agent/disconnect_manager.py +27 -0
  18. scaler/client/agent/future_manager.py +112 -0
  19. scaler/client/agent/heartbeat_manager.py +74 -0
  20. scaler/client/agent/mixins.py +89 -0
  21. scaler/client/agent/object_manager.py +98 -0
  22. scaler/client/agent/task_manager.py +64 -0
  23. scaler/client/client.py +672 -0
  24. scaler/client/future.py +252 -0
  25. scaler/client/object_buffer.py +129 -0
  26. scaler/client/object_reference.py +25 -0
  27. scaler/client/serializer/__init__.py +0 -0
  28. scaler/client/serializer/default.py +16 -0
  29. scaler/client/serializer/mixins.py +38 -0
  30. scaler/cluster/__init__.py +0 -0
  31. scaler/cluster/cluster.py +95 -0
  32. scaler/cluster/combo.py +157 -0
  33. scaler/cluster/object_storage_server.py +45 -0
  34. scaler/cluster/scheduler.py +86 -0
  35. scaler/config/__init__.py +0 -0
  36. scaler/config/common/__init__.py +0 -0
  37. scaler/config/common/logging.py +41 -0
  38. scaler/config/common/web.py +18 -0
  39. scaler/config/common/worker.py +65 -0
  40. scaler/config/common/worker_adapter.py +28 -0
  41. scaler/config/config_class.py +317 -0
  42. scaler/config/defaults.py +94 -0
  43. scaler/config/mixins.py +20 -0
  44. scaler/config/section/__init__.py +0 -0
  45. scaler/config/section/cluster.py +66 -0
  46. scaler/config/section/ecs_worker_adapter.py +78 -0
  47. scaler/config/section/native_worker_adapter.py +30 -0
  48. scaler/config/section/object_storage_server.py +13 -0
  49. scaler/config/section/scheduler.py +126 -0
  50. scaler/config/section/symphony_worker_adapter.py +35 -0
  51. scaler/config/section/top.py +16 -0
  52. scaler/config/section/webui.py +16 -0
  53. scaler/config/types/__init__.py +0 -0
  54. scaler/config/types/network_backend.py +12 -0
  55. scaler/config/types/object_storage_server.py +45 -0
  56. scaler/config/types/worker.py +67 -0
  57. scaler/config/types/zmq.py +83 -0
  58. scaler/entry_points/__init__.py +0 -0
  59. scaler/entry_points/cluster.py +10 -0
  60. scaler/entry_points/object_storage_server.py +26 -0
  61. scaler/entry_points/scheduler.py +51 -0
  62. scaler/entry_points/top.py +272 -0
  63. scaler/entry_points/webui.py +6 -0
  64. scaler/entry_points/worker_adapter_ecs.py +22 -0
  65. scaler/entry_points/worker_adapter_native.py +31 -0
  66. scaler/entry_points/worker_adapter_symphony.py +26 -0
  67. scaler/io/__init__.py +0 -0
  68. scaler/io/async_binder.py +89 -0
  69. scaler/io/async_connector.py +95 -0
  70. scaler/io/async_object_storage_connector.py +225 -0
  71. scaler/io/mixins.py +154 -0
  72. scaler/io/sync_connector.py +68 -0
  73. scaler/io/sync_object_storage_connector.py +249 -0
  74. scaler/io/sync_subscriber.py +83 -0
  75. scaler/io/utility.py +80 -0
  76. scaler/io/ymq/__init__.py +0 -0
  77. scaler/io/ymq/_ymq.pyi +95 -0
  78. scaler/io/ymq/_ymq.so +0 -0
  79. scaler/io/ymq/ymq.py +138 -0
  80. scaler/io/ymq_async_object_storage_connector.py +184 -0
  81. scaler/io/ymq_sync_object_storage_connector.py +184 -0
  82. scaler/object_storage/__init__.py +0 -0
  83. scaler/object_storage/object_storage_server.so +0 -0
  84. scaler/protocol/__init__.py +0 -0
  85. scaler/protocol/capnp/__init__.py +0 -0
  86. scaler/protocol/capnp/_python.py +6 -0
  87. scaler/protocol/capnp/common.capnp +68 -0
  88. scaler/protocol/capnp/message.capnp +218 -0
  89. scaler/protocol/capnp/object_storage.capnp +57 -0
  90. scaler/protocol/capnp/status.capnp +73 -0
  91. scaler/protocol/introduction.md +105 -0
  92. scaler/protocol/python/__init__.py +0 -0
  93. scaler/protocol/python/common.py +140 -0
  94. scaler/protocol/python/message.py +751 -0
  95. scaler/protocol/python/mixins.py +13 -0
  96. scaler/protocol/python/object_storage.py +118 -0
  97. scaler/protocol/python/status.py +279 -0
  98. scaler/protocol/worker.md +228 -0
  99. scaler/scheduler/__init__.py +0 -0
  100. scaler/scheduler/allocate_policy/__init__.py +0 -0
  101. scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
  102. scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
  103. scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
  104. scaler/scheduler/allocate_policy/mixins.py +55 -0
  105. scaler/scheduler/controllers/__init__.py +0 -0
  106. scaler/scheduler/controllers/balance_controller.py +65 -0
  107. scaler/scheduler/controllers/client_controller.py +131 -0
  108. scaler/scheduler/controllers/config_controller.py +31 -0
  109. scaler/scheduler/controllers/graph_controller.py +424 -0
  110. scaler/scheduler/controllers/information_controller.py +81 -0
  111. scaler/scheduler/controllers/mixins.py +194 -0
  112. scaler/scheduler/controllers/object_controller.py +147 -0
  113. scaler/scheduler/controllers/scaling_policies/__init__.py +0 -0
  114. scaler/scheduler/controllers/scaling_policies/fixed_elastic.py +145 -0
  115. scaler/scheduler/controllers/scaling_policies/mixins.py +10 -0
  116. scaler/scheduler/controllers/scaling_policies/null.py +14 -0
  117. scaler/scheduler/controllers/scaling_policies/types.py +9 -0
  118. scaler/scheduler/controllers/scaling_policies/utility.py +20 -0
  119. scaler/scheduler/controllers/scaling_policies/vanilla.py +95 -0
  120. scaler/scheduler/controllers/task_controller.py +376 -0
  121. scaler/scheduler/controllers/worker_controller.py +169 -0
  122. scaler/scheduler/object_usage/__init__.py +0 -0
  123. scaler/scheduler/object_usage/object_tracker.py +131 -0
  124. scaler/scheduler/scheduler.py +251 -0
  125. scaler/scheduler/task/__init__.py +0 -0
  126. scaler/scheduler/task/task_state_machine.py +92 -0
  127. scaler/scheduler/task/task_state_manager.py +61 -0
  128. scaler/ui/__init__.py +0 -0
  129. scaler/ui/common/__init__.py +0 -0
  130. scaler/ui/common/constants.py +9 -0
  131. scaler/ui/common/live_display.py +147 -0
  132. scaler/ui/common/memory_window.py +146 -0
  133. scaler/ui/common/setting_page.py +40 -0
  134. scaler/ui/common/task_graph.py +840 -0
  135. scaler/ui/common/task_log.py +111 -0
  136. scaler/ui/common/utility.py +66 -0
  137. scaler/ui/common/webui.py +80 -0
  138. scaler/ui/common/worker_processors.py +104 -0
  139. scaler/ui/v1.py +76 -0
  140. scaler/ui/v2.py +102 -0
  141. scaler/ui/webui.py +21 -0
  142. scaler/utility/__init__.py +0 -0
  143. scaler/utility/debug.py +19 -0
  144. scaler/utility/event_list.py +63 -0
  145. scaler/utility/event_loop.py +58 -0
  146. scaler/utility/exceptions.py +42 -0
  147. scaler/utility/formatter.py +44 -0
  148. scaler/utility/graph/__init__.py +0 -0
  149. scaler/utility/graph/optimization.py +27 -0
  150. scaler/utility/graph/topological_sorter.py +11 -0
  151. scaler/utility/graph/topological_sorter_graphblas.py +174 -0
  152. scaler/utility/identifiers.py +107 -0
  153. scaler/utility/logging/__init__.py +0 -0
  154. scaler/utility/logging/decorators.py +25 -0
  155. scaler/utility/logging/scoped_logger.py +33 -0
  156. scaler/utility/logging/utility.py +183 -0
  157. scaler/utility/many_to_many_dict.py +123 -0
  158. scaler/utility/metadata/__init__.py +0 -0
  159. scaler/utility/metadata/profile_result.py +31 -0
  160. scaler/utility/metadata/task_flags.py +30 -0
  161. scaler/utility/mixins.py +13 -0
  162. scaler/utility/network_util.py +7 -0
  163. scaler/utility/one_to_many_dict.py +72 -0
  164. scaler/utility/queues/__init__.py +0 -0
  165. scaler/utility/queues/async_indexed_queue.py +37 -0
  166. scaler/utility/queues/async_priority_queue.py +70 -0
  167. scaler/utility/queues/async_sorted_priority_queue.py +45 -0
  168. scaler/utility/queues/indexed_queue.py +114 -0
  169. scaler/utility/serialization.py +9 -0
  170. scaler/version.txt +1 -0
  171. scaler/worker/__init__.py +0 -0
  172. scaler/worker/agent/__init__.py +0 -0
  173. scaler/worker/agent/heartbeat_manager.py +110 -0
  174. scaler/worker/agent/mixins.py +137 -0
  175. scaler/worker/agent/processor/__init__.py +0 -0
  176. scaler/worker/agent/processor/object_cache.py +107 -0
  177. scaler/worker/agent/processor/processor.py +285 -0
  178. scaler/worker/agent/processor/streaming_buffer.py +28 -0
  179. scaler/worker/agent/processor_holder.py +147 -0
  180. scaler/worker/agent/processor_manager.py +369 -0
  181. scaler/worker/agent/profiling_manager.py +109 -0
  182. scaler/worker/agent/task_manager.py +150 -0
  183. scaler/worker/agent/timeout_manager.py +19 -0
  184. scaler/worker/preload.py +84 -0
  185. scaler/worker/worker.py +265 -0
  186. scaler/worker_adapter/__init__.py +0 -0
  187. scaler/worker_adapter/common.py +26 -0
  188. scaler/worker_adapter/ecs.py +241 -0
  189. scaler/worker_adapter/native.py +138 -0
  190. scaler/worker_adapter/symphony/__init__.py +0 -0
  191. scaler/worker_adapter/symphony/callback.py +45 -0
  192. scaler/worker_adapter/symphony/heartbeat_manager.py +82 -0
  193. scaler/worker_adapter/symphony/message.py +24 -0
  194. scaler/worker_adapter/symphony/task_manager.py +289 -0
  195. scaler/worker_adapter/symphony/worker.py +204 -0
  196. scaler/worker_adapter/symphony/worker_adapter.py +123 -0
@@ -0,0 +1,225 @@
1
+ import asyncio
2
+ import logging
3
+ import socket
4
+ import struct
5
+ import uuid
6
+ from typing import Dict, Optional, Tuple
7
+
8
+ from scaler.io.mixins import AsyncObjectStorageConnector
9
+ from scaler.protocol.capnp._python import _object_storage # noqa
10
+ from scaler.protocol.python.object_storage import ObjectRequestHeader, ObjectResponseHeader, to_capnp_object_id
11
+ from scaler.utility.exceptions import ObjectStorageException
12
+ from scaler.utility.identifiers import ObjectID
13
+
14
+
15
+ class PyAsyncObjectStorageConnector(AsyncObjectStorageConnector):
16
+ """An asyncio connector that uses an raw TCP socket to connect to a Scaler's object storage instance."""
17
+
18
+ def __init__(self):
19
+ self._host: Optional[str] = None
20
+ self._port: Optional[int] = None
21
+
22
+ self._connected_event = asyncio.Event()
23
+
24
+ self._reader: Optional[asyncio.StreamReader] = None
25
+ self._writer: Optional[asyncio.StreamWriter] = None
26
+
27
+ self._next_request_id = 0
28
+ self._pending_get_requests: Dict[ObjectID, asyncio.Future] = {}
29
+
30
+ self._identity: bytes = (
31
+ f"{self.__class__.__name__}|{socket.gethostname().split('.')[0]}|{uuid.uuid4()}".encode()
32
+ )
33
+
34
+ def __del__(self):
35
+ if not self.is_connected():
36
+ return
37
+
38
+ self._writer.close()
39
+
40
+ async def connect(self, host: str, port: int):
41
+ self._host = host
42
+ self._port = port
43
+
44
+ if self.is_connected():
45
+ raise ObjectStorageException("connector is already connected.")
46
+
47
+ self._reader, self._writer = await asyncio.open_connection(self._host, self._port)
48
+ await self.__read_framed_message()
49
+ self.__write_framed(self._identity)
50
+
51
+ try:
52
+ await self._writer.drain()
53
+ except ConnectionResetError:
54
+ self.__raise_connection_failure()
55
+
56
+ # Makes sure the socket is TCP_NODELAY. It seems to be the case by default, but that's not specified in the
57
+ # asyncio's documentation and might change in the future.
58
+ self._writer.get_extra_info("socket").setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
59
+
60
+ self._connected_event.set()
61
+
62
+ async def wait_until_connected(self):
63
+ await self._connected_event.wait()
64
+
65
+ def is_connected(self) -> bool:
66
+ return self._connected_event.is_set()
67
+
68
+ async def destroy(self):
69
+ if not self.is_connected():
70
+ return
71
+
72
+ if not self._writer.is_closing:
73
+ self._writer.close()
74
+
75
+ await self._writer.wait_closed()
76
+
77
+ @property
78
+ def reader(self) -> Optional[asyncio.StreamReader]:
79
+ return self._reader
80
+
81
+ @property
82
+ def writer(self) -> Optional[asyncio.StreamWriter]:
83
+ return self._writer
84
+
85
+ @property
86
+ def address(self) -> str:
87
+ self.__ensure_is_connected()
88
+ return f"tcp://{self._host}:{self._port}"
89
+
90
+ async def routine(self):
91
+ await self.wait_until_connected()
92
+
93
+ response = await self.__receive_response()
94
+ if response is None:
95
+ return
96
+
97
+ header, payload = response
98
+
99
+ if header.response_type != ObjectResponseHeader.ObjectResponseType.GetOK:
100
+ return
101
+
102
+ pending_get_future = self._pending_get_requests.pop(header.object_id, None)
103
+
104
+ if pending_get_future is None:
105
+ logging.warning(f"unknown get-ok response for unrequested object_id={repr(header.object_id)}.")
106
+ return
107
+
108
+ pending_get_future.set_result(payload)
109
+
110
+ async def set_object(self, object_id: ObjectID, payload: bytes) -> None:
111
+ await self.__send_request(object_id, len(payload), ObjectRequestHeader.ObjectRequestType.SetObject, payload)
112
+
113
+ async def get_object(self, object_id: ObjectID, max_payload_length: int = 2**64 - 1) -> bytes:
114
+ pending_get_future = self._pending_get_requests.get(object_id)
115
+
116
+ if pending_get_future is None:
117
+ pending_get_future = asyncio.Future()
118
+ self._pending_get_requests[object_id] = pending_get_future
119
+
120
+ await self.__send_request(
121
+ object_id, max_payload_length, ObjectRequestHeader.ObjectRequestType.GetObject, None
122
+ )
123
+
124
+ return await pending_get_future
125
+
126
+ async def delete_object(self, object_id: ObjectID) -> None:
127
+ await self.__send_request(object_id, 0, ObjectRequestHeader.ObjectRequestType.DeleteObject, None)
128
+
129
+ async def duplicate_object_id(self, object_id: ObjectID, new_object_id: ObjectID) -> None:
130
+ object_id_payload = to_capnp_object_id(object_id).to_bytes()
131
+
132
+ await self.__send_request(
133
+ new_object_id,
134
+ len(object_id_payload),
135
+ ObjectRequestHeader.ObjectRequestType.DuplicateObjectID,
136
+ object_id_payload,
137
+ )
138
+
139
+ def __ensure_is_connected(self):
140
+ if self._writer is None:
141
+ raise ObjectStorageException("connector is not connected.")
142
+
143
+ if self._writer.is_closing():
144
+ raise ObjectStorageException("connection is closed.")
145
+
146
+ async def __send_request(
147
+ self,
148
+ object_id: ObjectID,
149
+ payload_length: int,
150
+ request_type: ObjectRequestHeader.ObjectRequestType,
151
+ payload: Optional[bytes],
152
+ ):
153
+ self.__ensure_is_connected()
154
+ assert self._writer is not None
155
+
156
+ request_id = self._next_request_id
157
+ self._next_request_id += 1
158
+ self._next_request_id %= 2**64 - 1 # UINT64_MAX
159
+
160
+ header = ObjectRequestHeader.new_msg(object_id, payload_length, request_id, request_type)
161
+
162
+ self.__write_request_header(header)
163
+
164
+ if payload is not None:
165
+ self.__write_request_payload(payload)
166
+
167
+ try:
168
+ await self._writer.drain()
169
+ except ConnectionResetError:
170
+ self.__raise_connection_failure()
171
+
172
+ def __write_request_header(self, header: ObjectRequestHeader):
173
+ assert self._writer is not None
174
+ self.__write_framed(header.get_message().to_bytes())
175
+
176
+ def __write_request_payload(self, payload: bytes):
177
+ assert self._writer is not None
178
+ self.__write_framed(payload)
179
+
180
+ async def __receive_response(self) -> Optional[Tuple[ObjectResponseHeader, bytes]]:
181
+ assert self._reader is not None
182
+
183
+ if self._writer.is_closing():
184
+ return None
185
+
186
+ try:
187
+ header = await self.__read_response_header()
188
+ payload = await self.__read_response_payload(header)
189
+ except asyncio.IncompleteReadError:
190
+ self.__raise_connection_failure()
191
+
192
+ return header, payload
193
+
194
+ async def __read_response_header(self) -> ObjectResponseHeader:
195
+ assert self._reader is not None
196
+
197
+ header_data = await self.__read_framed_message()
198
+ assert len(header_data) == ObjectResponseHeader.MESSAGE_LENGTH
199
+
200
+ with _object_storage.ObjectResponseHeader.from_bytes(header_data) as header_message:
201
+ return ObjectResponseHeader(header_message)
202
+
203
+ async def __read_response_payload(self, header: ObjectResponseHeader) -> bytes:
204
+ assert self._reader is not None
205
+
206
+ if header.payload_length > 0:
207
+ res = await self.__read_framed_message()
208
+ assert len(res) == header.payload_length
209
+ return res
210
+ else:
211
+ return b""
212
+
213
+ async def __read_framed_message(self) -> bytes:
214
+ length_bytes = await self._reader.readexactly(8)
215
+ (payload_length,) = struct.unpack("<Q", length_bytes)
216
+ return await self._reader.readexactly(payload_length) if payload_length > 0 else bytes()
217
+
218
+ def __write_framed(self, payload: bytes):
219
+ self._writer.write(struct.pack("<Q", len(payload)))
220
+ self._writer.write(payload)
221
+ return
222
+
223
+ @staticmethod
224
+ def __raise_connection_failure():
225
+ raise ObjectStorageException("connection failure to object storage server.")
scaler/io/mixins.py ADDED
@@ -0,0 +1,154 @@
1
+ import abc
2
+ from typing import Awaitable, Callable, Optional
3
+
4
+ from scaler.protocol.python.mixins import Message
5
+ from scaler.protocol.python.status import BinderStatus
6
+ from scaler.utility.identifiers import ObjectID
7
+ from scaler.utility.mixins import Looper, Reporter
8
+
9
+
10
+ class AsyncBinder(Looper, Reporter, metaclass=abc.ABCMeta):
11
+ @property
12
+ @abc.abstractmethod
13
+ def identity(self):
14
+ raise NotImplementedError()
15
+
16
+ @abc.abstractmethod
17
+ def destroy(self):
18
+ raise NotImplementedError()
19
+
20
+ @abc.abstractmethod
21
+ def register(self, callback: Callable[[bytes, Message], Awaitable[None]]):
22
+ raise NotImplementedError()
23
+
24
+ @abc.abstractmethod
25
+ async def send(self, to: bytes, message: Message):
26
+ raise NotImplementedError()
27
+
28
+ @abc.abstractmethod
29
+ def get_status(self) -> BinderStatus:
30
+ raise NotImplementedError()
31
+
32
+
33
+ class AsyncConnector(Looper, metaclass=abc.ABCMeta):
34
+ @abc.abstractmethod
35
+ def destroy(self):
36
+ raise NotImplementedError()
37
+
38
+ @property
39
+ @abc.abstractmethod
40
+ def identity(self) -> bytes:
41
+ raise NotImplementedError()
42
+
43
+ @property
44
+ @abc.abstractmethod
45
+ def address(self) -> str:
46
+ raise NotImplementedError()
47
+
48
+ @abc.abstractmethod
49
+ async def send(self, message: Message):
50
+ raise NotImplementedError()
51
+
52
+ @abc.abstractmethod
53
+ async def receive(self) -> Optional[Message]:
54
+ raise NotImplementedError()
55
+
56
+
57
+ class SyncConnector(metaclass=abc.ABCMeta):
58
+ @abc.abstractmethod
59
+ def destroy(self):
60
+ raise NotImplementedError()
61
+
62
+ @property
63
+ @abc.abstractmethod
64
+ def identity(self) -> bytes:
65
+ raise NotImplementedError()
66
+
67
+ @property
68
+ @abc.abstractmethod
69
+ def address(self) -> str:
70
+ raise NotImplementedError()
71
+
72
+ @abc.abstractmethod
73
+ def send(self, message: Message):
74
+ raise NotImplementedError()
75
+
76
+ @abc.abstractmethod
77
+ def receive(self) -> Optional[Message]:
78
+ raise NotImplementedError()
79
+
80
+
81
+ class AsyncObjectStorageConnector(Looper, metaclass=abc.ABCMeta):
82
+ @abc.abstractmethod
83
+ async def connect(self, host: str, port: int):
84
+ raise NotImplementedError()
85
+
86
+ @abc.abstractmethod
87
+ async def wait_until_connected(self):
88
+ raise NotImplementedError()
89
+
90
+ @abc.abstractmethod
91
+ def is_connected(self) -> bool:
92
+ raise NotImplementedError()
93
+
94
+ @abc.abstractmethod
95
+ async def destroy(self):
96
+ raise NotImplementedError()
97
+
98
+ @property
99
+ @abc.abstractmethod
100
+ def address(self) -> str:
101
+ raise NotImplementedError()
102
+
103
+ @abc.abstractmethod
104
+ async def set_object(self, object_id: ObjectID, payload: bytes) -> None:
105
+ raise NotImplementedError()
106
+
107
+ @abc.abstractmethod
108
+ async def get_object(self, object_id: ObjectID, max_payload_length: int = 2**64 - 1) -> bytes:
109
+ raise NotImplementedError()
110
+
111
+ @abc.abstractmethod
112
+ async def delete_object(self, object_id: ObjectID) -> None:
113
+ raise NotImplementedError()
114
+
115
+ @abc.abstractmethod
116
+ async def duplicate_object_id(self, object_id: ObjectID, new_object_id: ObjectID) -> None:
117
+ raise NotImplementedError()
118
+
119
+
120
+ class SyncObjectStorageConnector(metaclass=abc.ABCMeta):
121
+ @abc.abstractmethod
122
+ def destroy(self):
123
+ raise NotImplementedError()
124
+
125
+ @property
126
+ @abc.abstractmethod
127
+ def address(self) -> str:
128
+ raise NotImplementedError()
129
+
130
+ @abc.abstractmethod
131
+ def set_object(self, object_id: ObjectID, payload: bytes):
132
+ raise NotImplementedError()
133
+
134
+ @abc.abstractmethod
135
+ def get_object(self, object_id: ObjectID, max_payload_length: int = 2**64 - 1) -> bytearray:
136
+ raise NotImplementedError()
137
+
138
+ @abc.abstractmethod
139
+ def delete_object(self, object_id: ObjectID) -> bool:
140
+ raise NotImplementedError()
141
+
142
+ @abc.abstractmethod
143
+ def duplicate_object_id(self, object_id: ObjectID, new_object_id: ObjectID) -> None:
144
+ raise NotImplementedError()
145
+
146
+
147
+ class SyncSubscriber(metaclass=abc.ABCMeta):
148
+ @abc.abstractmethod
149
+ def destroy(self):
150
+ raise NotImplementedError()
151
+
152
+ @abc.abstractmethod
153
+ def run(self) -> None:
154
+ raise NotImplementedError()
@@ -0,0 +1,68 @@
1
+ import logging
2
+ import os
3
+ import socket
4
+ import threading
5
+ import uuid
6
+ from typing import Optional
7
+
8
+ import zmq
9
+
10
+ from scaler.config.types.zmq import ZMQConfig
11
+ from scaler.io.mixins import SyncConnector
12
+ from scaler.io.utility import deserialize, serialize
13
+ from scaler.protocol.python.mixins import Message
14
+
15
+
16
+ class ZMQSyncConnector(SyncConnector):
17
+ def __init__(self, context: zmq.Context, socket_type: int, address: ZMQConfig, identity: Optional[bytes]):
18
+ self._address = address
19
+
20
+ self._context = context
21
+ self._socket = self._context.socket(socket_type)
22
+
23
+ self._identity: bytes = (
24
+ f"{os.getpid()}|{socket.gethostname().split('.')[0]}|{uuid.uuid4()}".encode()
25
+ if identity is None
26
+ else identity
27
+ )
28
+
29
+ # set socket option
30
+ self._socket.setsockopt(zmq.IDENTITY, self._identity)
31
+ self._socket.setsockopt(zmq.SNDHWM, 0)
32
+ self._socket.setsockopt(zmq.RCVHWM, 0)
33
+
34
+ self._socket.connect(self._address.to_address())
35
+
36
+ self._lock = threading.Lock()
37
+
38
+ def destroy(self):
39
+ self._socket.close()
40
+
41
+ @property
42
+ def address(self) -> str:
43
+ return self._address.to_address()
44
+
45
+ @property
46
+ def identity(self) -> bytes:
47
+ return self._identity
48
+
49
+ def send(self, message: Message):
50
+ with self._lock:
51
+ self._socket.send(serialize(message), copy=False)
52
+
53
+ def receive(self) -> Optional[Message]:
54
+ with self._lock:
55
+ payload = self._socket.recv(copy=False)
56
+
57
+ return self.__compose_message(payload.bytes)
58
+
59
+ def __compose_message(self, payload: bytes) -> Optional[Message]:
60
+ result: Optional[Message] = deserialize(payload)
61
+ if result is None:
62
+ logging.error(f"{self.__get_prefix()}: received unknown message: {payload!r}")
63
+ return None
64
+
65
+ return result
66
+
67
+ def __get_prefix(self):
68
+ return f"{self.__class__.__name__}[{self._identity.decode()}]:"
@@ -0,0 +1,249 @@
1
+ import collections
2
+ import socket
3
+ import struct
4
+ import uuid
5
+ from threading import Lock
6
+ from typing import Iterable, List, Optional, Tuple
7
+
8
+ from scaler.io.mixins import SyncObjectStorageConnector
9
+ from scaler.protocol.capnp._python import _object_storage # noqa
10
+ from scaler.protocol.python.object_storage import ObjectRequestHeader, ObjectResponseHeader, to_capnp_object_id
11
+ from scaler.utility.exceptions import ObjectStorageException
12
+ from scaler.utility.identifiers import ObjectID
13
+
14
+ # Some OSes raise an OSError when sending buffers too large with send() or sendmsg().
15
+ MAX_CHUNK_SIZE = 128 * 1024 * 1024
16
+
17
+
18
+ class PySyncObjectStorageConnector(SyncObjectStorageConnector):
19
+ """An synchronous connector that uses an raw TCP socket to connect to a Scaler's object storage instance."""
20
+
21
+ def __init__(self, host: str, port: int):
22
+ self._host = host
23
+ self._port = port
24
+
25
+ self._identity: bytes = (
26
+ f"{self.__class__.__name__}|{socket.gethostname().split('.')[0]}|{uuid.uuid4()}".encode()
27
+ )
28
+
29
+ self._socket: Optional[socket.socket] = socket.create_connection((self._host, self._port))
30
+ self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
31
+
32
+ self._next_request_id = 0
33
+
34
+ self._socket_lock = Lock()
35
+
36
+ self.__send_buffers([struct.pack("<Q", len(self._identity)), self._identity])
37
+ self.__read_framed_message() # receive server identity
38
+
39
+ def __del__(self):
40
+ self.destroy()
41
+
42
+ def destroy(self):
43
+ with self._socket_lock:
44
+ if self._socket is not None:
45
+ self._socket.close()
46
+ self._socket = None
47
+
48
+ @property
49
+ def address(self) -> str:
50
+ return f"tcp://{self._host}:{self._port}"
51
+
52
+ def set_object(self, object_id: ObjectID, payload: bytes):
53
+ """
54
+ Sets the object's payload on the object storage server.
55
+ """
56
+
57
+ with self._socket_lock:
58
+ self.__send_request(object_id, len(payload), ObjectRequestHeader.ObjectRequestType.SetObject, payload)
59
+ response_header, response_payload = self.__receive_response()
60
+
61
+ self.__ensure_response_type(response_header, [ObjectResponseHeader.ObjectResponseType.SetOK])
62
+ self.__ensure_empty_payload(response_payload)
63
+
64
+ def get_object(self, object_id: ObjectID, max_payload_length: int = 2**64 - 1) -> bytearray:
65
+ """
66
+ Returns the object's payload from the object storage server.
67
+
68
+ Will block until the object is available.
69
+ """
70
+
71
+ with self._socket_lock:
72
+ self.__send_request(object_id, max_payload_length, ObjectRequestHeader.ObjectRequestType.GetObject)
73
+ response_header, response_payload = self.__receive_response()
74
+
75
+ self.__ensure_response_type(response_header, [ObjectResponseHeader.ObjectResponseType.GetOK])
76
+
77
+ return response_payload
78
+
79
+ def delete_object(self, object_id: ObjectID) -> bool:
80
+ """
81
+ Removes the object from the object storage server.
82
+
83
+ Returns `False` if the object wasn't found in the server. Otherwise returns `True`.
84
+ """
85
+
86
+ with self._socket_lock:
87
+ self.__send_request(object_id, 0, ObjectRequestHeader.ObjectRequestType.DeleteObject)
88
+ response_header, response_payload = self.__receive_response()
89
+
90
+ self.__ensure_response_type(
91
+ response_header,
92
+ [ObjectResponseHeader.ObjectResponseType.DelOK, ObjectResponseHeader.ObjectResponseType.DelNotExists],
93
+ )
94
+ self.__ensure_empty_payload(response_payload)
95
+
96
+ return response_header.response_type == ObjectResponseHeader.ObjectResponseType.DelOK
97
+
98
+ def duplicate_object_id(self, object_id: ObjectID, new_object_id: ObjectID) -> None:
99
+ """
100
+ Link an object's content to a new object ID on the object storage server.
101
+ """
102
+
103
+ object_id_payload = to_capnp_object_id(object_id).to_bytes()
104
+
105
+ with self._socket_lock:
106
+ self.__send_request(
107
+ new_object_id,
108
+ len(object_id_payload),
109
+ ObjectRequestHeader.ObjectRequestType.DuplicateObjectID,
110
+ object_id_payload,
111
+ )
112
+ response_header, response_payload = self.__receive_response()
113
+
114
+ self.__ensure_response_type(response_header, [ObjectResponseHeader.ObjectResponseType.DuplicateOK])
115
+ self.__ensure_empty_payload(response_payload)
116
+
117
+ def __ensure_is_connected(self):
118
+ if self._socket is None:
119
+ raise ObjectStorageException("connector is closed.")
120
+
121
+ def __ensure_response_type(
122
+ self, header: ObjectResponseHeader, valid_response_types: Iterable[ObjectResponseHeader.ObjectResponseType]
123
+ ):
124
+ if header.response_type not in valid_response_types:
125
+ raise RuntimeError(f"unexpected object storage response_type={header.response_type}.")
126
+
127
+ def __ensure_empty_payload(self, payload: bytearray):
128
+ if len(payload) != 0:
129
+ raise RuntimeError(f"unexpected response payload_length={len(payload)}, expected 0.")
130
+
131
+ def __send_request(
132
+ self,
133
+ object_id: ObjectID,
134
+ payload_length: int,
135
+ request_type: ObjectRequestHeader.ObjectRequestType,
136
+ payload: Optional[bytes] = None,
137
+ ):
138
+ self.__ensure_is_connected()
139
+ assert self._socket is not None
140
+
141
+ request_id = self._next_request_id
142
+ self._next_request_id += 1
143
+ self._next_request_id %= 2**64 - 1 # UINT64_MAX
144
+
145
+ header = ObjectRequestHeader.new_msg(object_id, payload_length, request_id, request_type)
146
+ header_bytes = header.get_message().to_bytes()
147
+
148
+ if payload is not None:
149
+ self.__send_buffers(
150
+ [struct.pack("<Q", len(header_bytes)), header_bytes, struct.pack("<Q", len(payload)), payload]
151
+ )
152
+ else:
153
+ self.__send_buffers([struct.pack("<Q", len(header_bytes)), header_bytes])
154
+
155
+ def __send_buffers(self, buffers: List[bytes]) -> None:
156
+ if len(buffers) < 1:
157
+ return
158
+
159
+ assert self._socket is not None
160
+
161
+ total_size = sum(len(buffer) for buffer in buffers)
162
+
163
+ # If the message is small enough, first try to send it at once with sendmsg(). This would ensure the message can
164
+ # be transmitted within a single TCP segment.
165
+ if total_size < MAX_CHUNK_SIZE:
166
+ sent = self._socket.sendmsg(buffers) # type: ignore[attr-defined]
167
+
168
+ if sent <= 0:
169
+ self.__raise_connection_failure()
170
+
171
+ remaining_buffers = collections.deque(buffers)
172
+ while sent > len(remaining_buffers[0]):
173
+ removed_buffer = remaining_buffers.popleft()
174
+ sent -= len(removed_buffer)
175
+
176
+ if sent > 0:
177
+ # Truncate the first partially sent buffer
178
+ remaining_buffers[0] = memoryview(remaining_buffers[0])[sent:]
179
+
180
+ buffers = list(remaining_buffers)
181
+
182
+ # Send the remaining buffers sequentially
183
+ for buffer in buffers:
184
+ self.__send_buffer(buffer)
185
+
186
+ def __send_buffer(self, buffer: bytes) -> None:
187
+ buffer_view = memoryview(buffer)
188
+
189
+ total_sent = 0
190
+ while total_sent < len(buffer):
191
+ sent = self._socket.send(buffer_view[total_sent : MAX_CHUNK_SIZE + total_sent])
192
+
193
+ if sent <= 0:
194
+ self.__raise_connection_failure()
195
+
196
+ total_sent += sent
197
+
198
+ def __receive_response(self) -> Tuple[ObjectResponseHeader, bytearray]:
199
+ assert self._socket is not None
200
+
201
+ header = self.__read_response_header()
202
+ payload = self.__read_response_payload(header)
203
+
204
+ return header, payload
205
+
206
+ def __read_response_header(self) -> ObjectResponseHeader:
207
+ assert self._socket is not None
208
+
209
+ header_bytearray = self.__read_framed_message()
210
+
211
+ # pycapnp does not like to read from a bytearray object. This look like an not-yet-resolved issue.
212
+ # That's is annoying because it leads to an unnecessary copy of the header's buffer.
213
+ # See https://github.com/capnproto/pycapnp/issues/153
214
+ header_bytes = bytes(header_bytearray)
215
+
216
+ with _object_storage.ObjectResponseHeader.from_bytes(header_bytes) as header_message:
217
+ return ObjectResponseHeader(header_message)
218
+
219
+ def __read_response_payload(self, header: ObjectResponseHeader) -> bytearray:
220
+ if header.payload_length > 0:
221
+ res = self.__read_framed_message()
222
+ assert len(res) == header.payload_length
223
+ return res
224
+ else:
225
+ return bytearray()
226
+
227
+ def __read_exactly(self, length: int) -> bytearray:
228
+ buffer = bytearray(length)
229
+
230
+ total_received = 0
231
+ while total_received < length:
232
+ chunk_size = min(MAX_CHUNK_SIZE, length - total_received)
233
+ received = self._socket.recv_into(memoryview(buffer)[total_received:], chunk_size)
234
+
235
+ if received <= 0:
236
+ self.__raise_connection_failure()
237
+
238
+ total_received += received
239
+
240
+ return buffer
241
+
242
+ def __read_framed_message(self) -> bytearray:
243
+ length_bytes = self.__read_exactly(8)
244
+ (payload_length,) = struct.unpack("<Q", length_bytes)
245
+ return self.__read_exactly(payload_length) if payload_length > 0 else bytearray()
246
+
247
+ @staticmethod
248
+ def __raise_connection_failure():
249
+ raise ObjectStorageException("connection failure to object storage server.")