opengris-scaler 1.12.7__cp312-cp312-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of opengris-scaler might be problematic. Click here for more details.

Files changed (232) hide show
  1. opengris_scaler-1.12.7.dist-info/METADATA +729 -0
  2. opengris_scaler-1.12.7.dist-info/RECORD +232 -0
  3. opengris_scaler-1.12.7.dist-info/WHEEL +5 -0
  4. opengris_scaler-1.12.7.dist-info/entry_points.txt +9 -0
  5. opengris_scaler-1.12.7.dist-info/licenses/LICENSE +201 -0
  6. opengris_scaler-1.12.7.dist-info/licenses/LICENSE.spdx +7 -0
  7. opengris_scaler-1.12.7.dist-info/licenses/NOTICE +8 -0
  8. opengris_scaler.libs/libcapnp-1-b787335c.1.0.so +0 -0
  9. opengris_scaler.libs/libkj-1-094aa318.1.0.so +0 -0
  10. scaler/CMakeLists.txt +11 -0
  11. scaler/__init__.py +14 -0
  12. scaler/about.py +5 -0
  13. scaler/client/__init__.py +0 -0
  14. scaler/client/agent/__init__.py +0 -0
  15. scaler/client/agent/client_agent.py +210 -0
  16. scaler/client/agent/disconnect_manager.py +27 -0
  17. scaler/client/agent/future_manager.py +112 -0
  18. scaler/client/agent/heartbeat_manager.py +74 -0
  19. scaler/client/agent/mixins.py +89 -0
  20. scaler/client/agent/object_manager.py +98 -0
  21. scaler/client/agent/task_manager.py +64 -0
  22. scaler/client/client.py +635 -0
  23. scaler/client/future.py +252 -0
  24. scaler/client/object_buffer.py +129 -0
  25. scaler/client/object_reference.py +25 -0
  26. scaler/client/serializer/__init__.py +0 -0
  27. scaler/client/serializer/default.py +16 -0
  28. scaler/client/serializer/mixins.py +38 -0
  29. scaler/cluster/__init__.py +0 -0
  30. scaler/cluster/cluster.py +115 -0
  31. scaler/cluster/combo.py +148 -0
  32. scaler/cluster/object_storage_server.py +45 -0
  33. scaler/cluster/scheduler.py +83 -0
  34. scaler/config/__init__.py +0 -0
  35. scaler/config/defaults.py +87 -0
  36. scaler/config/loader.py +95 -0
  37. scaler/config/mixins.py +15 -0
  38. scaler/config/section/__init__.py +0 -0
  39. scaler/config/section/cluster.py +56 -0
  40. scaler/config/section/native_worker_adapter.py +44 -0
  41. scaler/config/section/object_storage_server.py +7 -0
  42. scaler/config/section/scheduler.py +53 -0
  43. scaler/config/section/symphony_worker_adapter.py +47 -0
  44. scaler/config/section/top.py +13 -0
  45. scaler/config/section/webui.py +16 -0
  46. scaler/config/types/__init__.py +0 -0
  47. scaler/config/types/object_storage_server.py +45 -0
  48. scaler/config/types/worker.py +57 -0
  49. scaler/config/types/zmq.py +79 -0
  50. scaler/entry_points/__init__.py +0 -0
  51. scaler/entry_points/cluster.py +133 -0
  52. scaler/entry_points/object_storage_server.py +41 -0
  53. scaler/entry_points/scheduler.py +135 -0
  54. scaler/entry_points/top.py +286 -0
  55. scaler/entry_points/webui.py +26 -0
  56. scaler/entry_points/worker_adapter_native.py +137 -0
  57. scaler/entry_points/worker_adapter_symphony.py +102 -0
  58. scaler/io/__init__.py +0 -0
  59. scaler/io/async_binder.py +85 -0
  60. scaler/io/async_connector.py +95 -0
  61. scaler/io/async_object_storage_connector.py +185 -0
  62. scaler/io/mixins.py +154 -0
  63. scaler/io/sync_connector.py +68 -0
  64. scaler/io/sync_object_storage_connector.py +185 -0
  65. scaler/io/sync_subscriber.py +83 -0
  66. scaler/io/utility.py +31 -0
  67. scaler/io/ymq/CMakeLists.txt +98 -0
  68. scaler/io/ymq/__init__.py +0 -0
  69. scaler/io/ymq/_ymq.pyi +96 -0
  70. scaler/io/ymq/_ymq.so +0 -0
  71. scaler/io/ymq/bytes.h +114 -0
  72. scaler/io/ymq/common.h +29 -0
  73. scaler/io/ymq/configuration.h +60 -0
  74. scaler/io/ymq/epoll_context.cpp +185 -0
  75. scaler/io/ymq/epoll_context.h +85 -0
  76. scaler/io/ymq/error.h +132 -0
  77. scaler/io/ymq/event_loop.h +55 -0
  78. scaler/io/ymq/event_loop_thread.cpp +64 -0
  79. scaler/io/ymq/event_loop_thread.h +46 -0
  80. scaler/io/ymq/event_manager.h +81 -0
  81. scaler/io/ymq/file_descriptor.h +203 -0
  82. scaler/io/ymq/interruptive_concurrent_queue.h +169 -0
  83. scaler/io/ymq/io_context.cpp +98 -0
  84. scaler/io/ymq/io_context.h +44 -0
  85. scaler/io/ymq/io_socket.cpp +299 -0
  86. scaler/io/ymq/io_socket.h +121 -0
  87. scaler/io/ymq/iocp_context.cpp +102 -0
  88. scaler/io/ymq/iocp_context.h +83 -0
  89. scaler/io/ymq/logging.h +163 -0
  90. scaler/io/ymq/message.h +15 -0
  91. scaler/io/ymq/message_connection.h +16 -0
  92. scaler/io/ymq/message_connection_tcp.cpp +672 -0
  93. scaler/io/ymq/message_connection_tcp.h +96 -0
  94. scaler/io/ymq/network_utils.h +179 -0
  95. scaler/io/ymq/pymod_ymq/bytes.h +113 -0
  96. scaler/io/ymq/pymod_ymq/exception.h +124 -0
  97. scaler/io/ymq/pymod_ymq/gil.h +15 -0
  98. scaler/io/ymq/pymod_ymq/io_context.h +166 -0
  99. scaler/io/ymq/pymod_ymq/io_socket.h +285 -0
  100. scaler/io/ymq/pymod_ymq/message.h +99 -0
  101. scaler/io/ymq/pymod_ymq/python.h +153 -0
  102. scaler/io/ymq/pymod_ymq/ymq.cpp +23 -0
  103. scaler/io/ymq/pymod_ymq/ymq.h +357 -0
  104. scaler/io/ymq/readme.md +114 -0
  105. scaler/io/ymq/simple_interface.cpp +80 -0
  106. scaler/io/ymq/simple_interface.h +24 -0
  107. scaler/io/ymq/tcp_client.cpp +367 -0
  108. scaler/io/ymq/tcp_client.h +75 -0
  109. scaler/io/ymq/tcp_operations.h +41 -0
  110. scaler/io/ymq/tcp_server.cpp +410 -0
  111. scaler/io/ymq/tcp_server.h +79 -0
  112. scaler/io/ymq/third_party/concurrentqueue.h +3747 -0
  113. scaler/io/ymq/timed_queue.h +272 -0
  114. scaler/io/ymq/timestamp.h +102 -0
  115. scaler/io/ymq/typedefs.h +20 -0
  116. scaler/io/ymq/utils.h +34 -0
  117. scaler/io/ymq/ymq.py +130 -0
  118. scaler/object_storage/CMakeLists.txt +50 -0
  119. scaler/object_storage/__init__.py +0 -0
  120. scaler/object_storage/constants.h +11 -0
  121. scaler/object_storage/defs.h +14 -0
  122. scaler/object_storage/io_helper.cpp +44 -0
  123. scaler/object_storage/io_helper.h +9 -0
  124. scaler/object_storage/message.cpp +56 -0
  125. scaler/object_storage/message.h +130 -0
  126. scaler/object_storage/object_manager.cpp +126 -0
  127. scaler/object_storage/object_manager.h +52 -0
  128. scaler/object_storage/object_storage_server.cpp +359 -0
  129. scaler/object_storage/object_storage_server.h +126 -0
  130. scaler/object_storage/object_storage_server.so +0 -0
  131. scaler/object_storage/pymod_object_storage_server.cpp +104 -0
  132. scaler/protocol/__init__.py +0 -0
  133. scaler/protocol/capnp/__init__.py +0 -0
  134. scaler/protocol/capnp/_python.py +6 -0
  135. scaler/protocol/capnp/common.capnp +63 -0
  136. scaler/protocol/capnp/message.capnp +216 -0
  137. scaler/protocol/capnp/object_storage.capnp +52 -0
  138. scaler/protocol/capnp/status.capnp +73 -0
  139. scaler/protocol/introduction.md +105 -0
  140. scaler/protocol/python/__init__.py +0 -0
  141. scaler/protocol/python/common.py +135 -0
  142. scaler/protocol/python/message.py +726 -0
  143. scaler/protocol/python/mixins.py +13 -0
  144. scaler/protocol/python/object_storage.py +118 -0
  145. scaler/protocol/python/status.py +279 -0
  146. scaler/protocol/worker.md +228 -0
  147. scaler/scheduler/__init__.py +0 -0
  148. scaler/scheduler/allocate_policy/__init__.py +0 -0
  149. scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
  150. scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
  151. scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
  152. scaler/scheduler/allocate_policy/mixins.py +55 -0
  153. scaler/scheduler/controllers/__init__.py +0 -0
  154. scaler/scheduler/controllers/balance_controller.py +65 -0
  155. scaler/scheduler/controllers/client_controller.py +131 -0
  156. scaler/scheduler/controllers/config_controller.py +31 -0
  157. scaler/scheduler/controllers/graph_controller.py +424 -0
  158. scaler/scheduler/controllers/information_controller.py +81 -0
  159. scaler/scheduler/controllers/mixins.py +201 -0
  160. scaler/scheduler/controllers/object_controller.py +147 -0
  161. scaler/scheduler/controllers/scaling_controller.py +86 -0
  162. scaler/scheduler/controllers/task_controller.py +373 -0
  163. scaler/scheduler/controllers/worker_controller.py +168 -0
  164. scaler/scheduler/object_usage/__init__.py +0 -0
  165. scaler/scheduler/object_usage/object_tracker.py +131 -0
  166. scaler/scheduler/scheduler.py +253 -0
  167. scaler/scheduler/task/__init__.py +0 -0
  168. scaler/scheduler/task/task_state_machine.py +92 -0
  169. scaler/scheduler/task/task_state_manager.py +61 -0
  170. scaler/ui/__init__.py +0 -0
  171. scaler/ui/constants.py +9 -0
  172. scaler/ui/live_display.py +118 -0
  173. scaler/ui/memory_window.py +146 -0
  174. scaler/ui/setting_page.py +47 -0
  175. scaler/ui/task_graph.py +370 -0
  176. scaler/ui/task_log.py +83 -0
  177. scaler/ui/utility.py +35 -0
  178. scaler/ui/webui.py +125 -0
  179. scaler/ui/worker_processors.py +85 -0
  180. scaler/utility/__init__.py +0 -0
  181. scaler/utility/debug.py +19 -0
  182. scaler/utility/event_list.py +63 -0
  183. scaler/utility/event_loop.py +58 -0
  184. scaler/utility/exceptions.py +42 -0
  185. scaler/utility/formatter.py +44 -0
  186. scaler/utility/graph/__init__.py +0 -0
  187. scaler/utility/graph/optimization.py +27 -0
  188. scaler/utility/graph/topological_sorter.py +11 -0
  189. scaler/utility/graph/topological_sorter_graphblas.py +174 -0
  190. scaler/utility/identifiers.py +105 -0
  191. scaler/utility/logging/__init__.py +0 -0
  192. scaler/utility/logging/decorators.py +25 -0
  193. scaler/utility/logging/scoped_logger.py +33 -0
  194. scaler/utility/logging/utility.py +183 -0
  195. scaler/utility/many_to_many_dict.py +123 -0
  196. scaler/utility/metadata/__init__.py +0 -0
  197. scaler/utility/metadata/profile_result.py +31 -0
  198. scaler/utility/metadata/task_flags.py +30 -0
  199. scaler/utility/mixins.py +13 -0
  200. scaler/utility/network_util.py +7 -0
  201. scaler/utility/one_to_many_dict.py +72 -0
  202. scaler/utility/queues/__init__.py +0 -0
  203. scaler/utility/queues/async_indexed_queue.py +37 -0
  204. scaler/utility/queues/async_priority_queue.py +70 -0
  205. scaler/utility/queues/async_sorted_priority_queue.py +45 -0
  206. scaler/utility/queues/indexed_queue.py +114 -0
  207. scaler/utility/serialization.py +9 -0
  208. scaler/version.txt +1 -0
  209. scaler/worker/__init__.py +0 -0
  210. scaler/worker/agent/__init__.py +0 -0
  211. scaler/worker/agent/heartbeat_manager.py +107 -0
  212. scaler/worker/agent/mixins.py +137 -0
  213. scaler/worker/agent/processor/__init__.py +0 -0
  214. scaler/worker/agent/processor/object_cache.py +107 -0
  215. scaler/worker/agent/processor/processor.py +279 -0
  216. scaler/worker/agent/processor/streaming_buffer.py +28 -0
  217. scaler/worker/agent/processor_holder.py +145 -0
  218. scaler/worker/agent/processor_manager.py +365 -0
  219. scaler/worker/agent/profiling_manager.py +109 -0
  220. scaler/worker/agent/task_manager.py +150 -0
  221. scaler/worker/agent/timeout_manager.py +19 -0
  222. scaler/worker/preload.py +84 -0
  223. scaler/worker/worker.py +264 -0
  224. scaler/worker_adapter/__init__.py +0 -0
  225. scaler/worker_adapter/native.py +154 -0
  226. scaler/worker_adapter/symphony/__init__.py +0 -0
  227. scaler/worker_adapter/symphony/callback.py +45 -0
  228. scaler/worker_adapter/symphony/heartbeat_manager.py +79 -0
  229. scaler/worker_adapter/symphony/message.py +24 -0
  230. scaler/worker_adapter/symphony/task_manager.py +288 -0
  231. scaler/worker_adapter/symphony/worker.py +205 -0
  232. scaler/worker_adapter/symphony/worker_adapter.py +142 -0
@@ -0,0 +1,47 @@
1
+ import dataclasses
2
+ from typing import Optional, Tuple
3
+
4
+ from scaler.config.types.object_storage_server import ObjectStorageConfig
5
+ from scaler.config.types.worker import WorkerCapabilities
6
+ from scaler.config.types.zmq import ZMQConfig
7
+ from scaler.config import defaults
8
+ from scaler.utility.logging.utility import LoggingLevel
9
+
10
+
11
+ @dataclasses.dataclass
12
+ class SymphonyWorkerConfig:
13
+ scheduler_address: ZMQConfig
14
+ object_storage_address: Optional[ObjectStorageConfig]
15
+ service_name: str
16
+ base_concurrency: int = defaults.DEFAULT_NUMBER_OF_WORKER
17
+ worker_capabilities: WorkerCapabilities = dataclasses.field(
18
+ default_factory=lambda: WorkerCapabilities.from_string("")
19
+ )
20
+ server_http_host: str = "localhost"
21
+ server_http_port: int = 0
22
+ io_threads: int = defaults.DEFAULT_IO_THREADS
23
+ worker_task_queue_size: int = defaults.DEFAULT_PER_WORKER_QUEUE_SIZE
24
+ heartbeat_interval: int = defaults.DEFAULT_HEARTBEAT_INTERVAL_SECONDS
25
+ death_timeout_seconds: int = defaults.DEFAULT_WORKER_DEATH_TIMEOUT
26
+ event_loop: str = "builtin"
27
+ logging_paths: Tuple[str, ...] = defaults.DEFAULT_LOGGING_PATHS
28
+ logging_level: str = defaults.DEFAULT_LOGGING_LEVEL
29
+ logging_config_file: Optional[str] = None
30
+
31
+ def __post_init__(self):
32
+ """Validates configuration values after initialization."""
33
+ if (
34
+ self.base_concurrency <= 0
35
+ or self.worker_task_queue_size <= 0
36
+ or self.heartbeat_interval <= 0
37
+ or self.death_timeout_seconds <= 0
38
+ or self.io_threads <= 0
39
+ ):
40
+ raise ValueError("All concurrency, queue size, timeout, and thread count values must be positive integers.")
41
+
42
+ if not self.service_name:
43
+ raise ValueError("service_name cannot be an empty string.")
44
+
45
+ valid_levels = {level.name for level in LoggingLevel}
46
+ if self.logging_level.upper() not in valid_levels:
47
+ raise ValueError(f"logging_level must be one of {valid_levels}, but got '{self.logging_level}'")
@@ -0,0 +1,13 @@
1
+ import dataclasses
2
+
3
+ from scaler.config.types.zmq import ZMQConfig
4
+
5
+
6
+ @dataclasses.dataclass
7
+ class TopConfig:
8
+ monitor_address: ZMQConfig
9
+ timeout: int = 5
10
+
11
+ def __post_init__(self):
12
+ if self.timeout <= 0:
13
+ raise ValueError("timeout must be a positive integer.")
@@ -0,0 +1,16 @@
1
+ import dataclasses
2
+
3
+ from scaler.config.types.zmq import ZMQConfig
4
+
5
+
6
+ @dataclasses.dataclass
7
+ class WebUIConfig:
8
+ monitor_address: ZMQConfig
9
+ web_host: str = "0.0.0.0"
10
+ web_port: int = 50001
11
+
12
+ def __post_init__(self):
13
+ if not isinstance(self.web_host, str):
14
+ raise TypeError(f"Web host should be string, given {self.web_host}")
15
+ if not isinstance(self.web_port, int):
16
+ raise TypeError(f"Web port should be an integer, given {self.web_port}")
File without changes
@@ -0,0 +1,45 @@
1
+ import dataclasses
2
+ import ipaddress
3
+
4
+ from scaler.config.mixins import ConfigType
5
+
6
+
7
+ @dataclasses.dataclass
8
+ class ObjectStorageConfig(ConfigType):
9
+ host: str
10
+ port: int
11
+ identity: str = "ObjectStorageServer"
12
+
13
+ def __post_init__(self):
14
+ try:
15
+ ipaddress.ip_address(self.host)
16
+ except ValueError:
17
+ raise TypeError(f"Host must be a valid IP address, but got '{self.host}'")
18
+
19
+ if not isinstance(self.identity, str):
20
+ raise TypeError(f"Identity should be a string, given {self.identity}")
21
+
22
+ if not isinstance(self.port, int):
23
+ raise TypeError(f"Port should be an integer, given {self.port}")
24
+
25
+ def __str__(self) -> str:
26
+ return self.to_string()
27
+
28
+ def to_string(self) -> str:
29
+ return f"tcp://{self.host}:{self.port}"
30
+
31
+ @classmethod
32
+ def from_string(cls, value: str) -> "ObjectStorageConfig":
33
+ if not value.startswith("tcp://"):
34
+ raise ValueError("Address must start with 'tcp://'")
35
+
36
+ try:
37
+ host, port_str = value[6:].rsplit(":", 1)
38
+ port = int(port_str)
39
+
40
+ ipaddress.ip_address(host)
41
+
42
+ except (ValueError, IndexError):
43
+ raise ValueError(f"Invalid address format '{value}'. Expected format is tcp://<ip_address>:<port>")
44
+
45
+ return cls(host=host, port=port)
@@ -0,0 +1,57 @@
1
+ import dataclasses
2
+ from typing import List, Dict
3
+ from typing_extensions import Self
4
+
5
+ from scaler.config.mixins import ConfigType
6
+
7
+
8
+ @dataclasses.dataclass
9
+ class WorkerNames(ConfigType):
10
+ """Parses a comma-separated string of worker names into a list."""
11
+
12
+ names: List[str]
13
+
14
+ @classmethod
15
+ def from_string(cls, value: str) -> Self:
16
+ if not value:
17
+ return cls([])
18
+ names = [name.strip() for name in value.split(",")]
19
+ return cls(names)
20
+
21
+ def __str__(self) -> str:
22
+ return ",".join(self.names)
23
+
24
+ def __len__(self) -> int:
25
+ return len(self.names)
26
+
27
+
28
+ @dataclasses.dataclass
29
+ class WorkerCapabilities(ConfigType):
30
+ """Parses a string of worker capabilities."""
31
+
32
+ capabilities: Dict[str, int]
33
+
34
+ @classmethod
35
+ def from_string(cls, value: str) -> Self:
36
+ capabilities: Dict[str, int] = {}
37
+ if not value:
38
+ return cls(capabilities)
39
+ for item in value.split(","):
40
+ name, _, value = item.partition("=")
41
+ if value != "":
42
+ try:
43
+ capabilities[name.strip()] = int(value)
44
+ except ValueError:
45
+ raise ValueError(f"Invalid capability value for '{name}'. Expected an integer, but got '{value}'.")
46
+ else:
47
+ capabilities[name.strip()] = -1
48
+ return cls(capabilities)
49
+
50
+ def __str__(self) -> str:
51
+ items = []
52
+ for name, cap in self.capabilities.items():
53
+ if cap == -1:
54
+ items.append(name)
55
+ else:
56
+ items.append(f"{name}={cap}")
57
+ return ",".join(items)
@@ -0,0 +1,79 @@
1
+ import dataclasses
2
+ import enum
3
+ from typing import Optional
4
+
5
+ from typing_extensions import Self
6
+ from scaler.config.mixins import ConfigType
7
+
8
+
9
+ class ZMQType(enum.Enum):
10
+ inproc = "inproc"
11
+ ipc = "ipc"
12
+ tcp = "tcp"
13
+
14
+ @staticmethod
15
+ def allowed_types():
16
+ return {t.value for t in ZMQType}
17
+
18
+
19
+ @dataclasses.dataclass
20
+ class ZMQConfig(ConfigType):
21
+ type: ZMQType
22
+ host: str
23
+ port: Optional[int] = None
24
+
25
+ def __post_init__(self):
26
+ if not isinstance(self.type, ZMQType):
27
+ raise TypeError(f"Invalid zmq type {self.type}, available types are: {ZMQType.allowed_types()}")
28
+
29
+ if not isinstance(self.host, str):
30
+ raise TypeError(f"Host should be string, given {self.host}")
31
+
32
+ if self.port is None:
33
+ if self.type == ZMQType.tcp:
34
+ raise ValueError(f"type {self.type.value} should have `port`")
35
+ else:
36
+ if self.type in {ZMQType.inproc, ZMQType.ipc}:
37
+ raise ValueError(f"type {self.type.value} should not have `port`")
38
+
39
+ if not isinstance(self.port, int):
40
+ raise TypeError(f"Port should be integer, given {self.port}")
41
+
42
+ def to_address(self):
43
+ if self.type == ZMQType.tcp:
44
+ return f"tcp://{self.host}:{self.port}"
45
+
46
+ if self.type in {ZMQType.inproc, ZMQType.ipc}:
47
+ return f"{self.type.value}://{self.host}"
48
+
49
+ raise TypeError(f"Unsupported ZMQ type: {self.type}")
50
+
51
+ @classmethod
52
+ def from_string(cls, value: str) -> Self:
53
+ if "://" not in value:
54
+ raise ValueError("valid ZMQ config should be like tcp://127.0.0.1:12345")
55
+
56
+ socket_type, host_port = value.split("://", 1)
57
+ if socket_type not in ZMQType.allowed_types():
58
+ raise ValueError(f"supported ZMQ types are: {ZMQType.allowed_types()}")
59
+
60
+ socket_type_enum = ZMQType(socket_type)
61
+ if socket_type_enum in {ZMQType.inproc, ZMQType.ipc}:
62
+ host = host_port
63
+ port_int = None
64
+ elif socket_type_enum == ZMQType.tcp:
65
+ host, port = host_port.split(":")
66
+ try:
67
+ port_int = int(port)
68
+ except ValueError:
69
+ raise ValueError(f"cannot convert '{port}' to port number")
70
+ else:
71
+ raise ValueError(f"Unsupported ZMQ type: {socket_type}")
72
+
73
+ return cls(socket_type_enum, host, port_int)
74
+
75
+ def __str__(self) -> str:
76
+ return self.to_address()
77
+
78
+ def __repr__(self) -> str:
79
+ return self.to_address()
File without changes
@@ -0,0 +1,133 @@
1
+ import argparse
2
+ import socket
3
+
4
+ from scaler.cluster.cluster import Cluster
5
+ from scaler.config.section.cluster import ClusterConfig
6
+ from scaler.config.loader import load_config
7
+ from scaler.utility.event_loop import EventLoopType, register_event_loop
8
+
9
+
10
+ def get_args():
11
+ parser = argparse.ArgumentParser(
12
+ "standalone compute cluster", formatter_class=argparse.ArgumentDefaultsHelpFormatter
13
+ )
14
+ parser.add_argument("--config", "-c", type=str, default=None, help="Path to the TOML configuration file.")
15
+
16
+ parser.add_argument(
17
+ "--preload",
18
+ type=str,
19
+ default=None,
20
+ help='optional module init in the form "pkg.mod:func(arg1, arg2)" executed in each processor before tasks',
21
+ )
22
+ parser.add_argument("--num-of-workers", "-n", type=int, help="number of workers in cluster")
23
+ parser.add_argument(
24
+ "--worker-names",
25
+ "-wn",
26
+ type=str,
27
+ help="worker names to replace default worker names (host names), separate by comma",
28
+ )
29
+ parser.add_argument(
30
+ "--per-worker-capabilities",
31
+ "-pwc",
32
+ type=str,
33
+ help='comma-separated capabilities provided by the workers (e.g. "-pwc linux,cpu=4")',
34
+ )
35
+ parser.add_argument("--per-worker-task-queue-size", "-wtqs", type=int, help="specify per worker queue size")
36
+ parser.add_argument(
37
+ "--heartbeat-interval-seconds", "-hi", type=int, help="number of seconds to send heartbeat interval"
38
+ )
39
+ parser.add_argument(
40
+ "--task-timeout-seconds",
41
+ "-tts",
42
+ type=int,
43
+ help="number of seconds task treat as timeout and return an exception",
44
+ )
45
+ parser.add_argument("--garbage-collect-interval-seconds", "-gc", type=int, help="garbage collect interval seconds")
46
+ parser.add_argument("--death-timeout-seconds", "-ds", type=int, help="death timeout seconds")
47
+ parser.add_argument(
48
+ "--trim-memory-threshold-bytes", "-tm", type=int, help="number of bytes threshold to enable libc to trim memory"
49
+ )
50
+ parser.add_argument("--event-loop", "-el", choices=EventLoopType.allowed_types(), help="select event loop type")
51
+ parser.add_argument("--io-threads", "-it", type=int, help="specify number of io threads per worker")
52
+ parser.add_argument(
53
+ "--hard-processor-suspend",
54
+ "-hps",
55
+ action="store_true",
56
+ help=(
57
+ "When set, suspends worker processors using the SIGTSTP signal instead of a synchronization event, "
58
+ "fully halting computation on suspended tasks. Note that this may cause some tasks to fail if they "
59
+ "do not support being paused at the OS level (e.g. tasks requiring active network connections)."
60
+ ),
61
+ )
62
+ parser.add_argument("--log-hub-address", "-la", type=str, help="address for Worker send logs")
63
+ parser.add_argument(
64
+ "--logging-paths",
65
+ "-lp",
66
+ nargs="*",
67
+ type=str,
68
+ help='specify where cluster log should logged to, it can be multiple paths, "/dev/stdout" is default for '
69
+ "standard output, each worker will have its own log file with process id appended to the path",
70
+ )
71
+ parser.add_argument(
72
+ "--logging-level",
73
+ "-ll",
74
+ type=str,
75
+ choices=("DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"),
76
+ help="specify the logging level",
77
+ )
78
+ parser.add_argument(
79
+ "--logging-config-file",
80
+ "-lcf",
81
+ type=str,
82
+ help="use standard python the .conf file the specify python logging file configuration format, this will "
83
+ "bypass --logging-paths and --logging-level at the same time, and this will not work on per worker logging",
84
+ )
85
+ parser.add_argument(
86
+ "--object-storage-address",
87
+ "-osa",
88
+ type=str,
89
+ help="specify the object storage server address, e.g. tcp://localhost:2346. If not specified, use the address "
90
+ "provided by the scheduler",
91
+ )
92
+ parser.add_argument("scheduler_address", nargs="?", type=str, help="scheduler address to connect to")
93
+
94
+ return parser.parse_args()
95
+
96
+
97
+ def main():
98
+ args = get_args()
99
+
100
+ cluster_config = load_config(ClusterConfig, args.config, args, section_name="cluster")
101
+
102
+ register_event_loop(cluster_config.event_loop)
103
+
104
+ worker_names = cluster_config.worker_names.names
105
+ if not worker_names:
106
+ worker_names = [f"{socket.gethostname().split('.')[0]}" for _ in range(cluster_config.num_of_workers)]
107
+
108
+ if len(worker_names) != cluster_config.num_of_workers:
109
+ raise ValueError(
110
+ f"Number of worker names ({len(worker_names)}) must match the number of workers "
111
+ f"({cluster_config.num_of_workers})."
112
+ )
113
+
114
+ cluster = Cluster(
115
+ address=cluster_config.scheduler_address,
116
+ storage_address=cluster_config.storage_address,
117
+ preload=cluster_config.preload,
118
+ worker_names=worker_names,
119
+ per_worker_capabilities=cluster_config.per_worker_capabilities.capabilities,
120
+ per_worker_task_queue_size=cluster_config.per_worker_task_queue_size,
121
+ heartbeat_interval_seconds=cluster_config.heartbeat_interval_seconds,
122
+ task_timeout_seconds=cluster_config.task_timeout_seconds,
123
+ garbage_collect_interval_seconds=cluster_config.garbage_collect_interval_seconds,
124
+ trim_memory_threshold_bytes=cluster_config.trim_memory_threshold_bytes,
125
+ death_timeout_seconds=cluster_config.death_timeout_seconds,
126
+ hard_processor_suspend=cluster_config.hard_processor_suspend,
127
+ event_loop=cluster_config.event_loop,
128
+ worker_io_threads=cluster_config.worker_io_threads,
129
+ logging_paths=cluster_config.logging_paths,
130
+ logging_level=cluster_config.logging_level,
131
+ logging_config_file=cluster_config.logging_config_file,
132
+ )
133
+ cluster.run()
@@ -0,0 +1,41 @@
1
+ import argparse
2
+ import logging
3
+
4
+ from scaler.config.loader import load_config
5
+ from scaler.config.section.object_storage_server import ObjectStorageServerConfig
6
+ from scaler.object_storage.object_storage_server import ObjectStorageServer
7
+ from scaler.utility.logging.utility import get_logger_info, setup_logger
8
+
9
+
10
+ def get_args():
11
+ parser = argparse.ArgumentParser(
12
+ "scaler_object_storage_server", formatter_class=argparse.ArgumentDefaultsHelpFormatter
13
+ )
14
+ parser.add_argument("--config", "-c", type=str, default=None, help="Path to the TOML configuration file.")
15
+
16
+ parser.add_argument(
17
+ "object_storage_address",
18
+ nargs="?",
19
+ type=str,
20
+ help="specify the object storage server address to listen to, e.g. tcp://localhost:2345.",
21
+ )
22
+ return parser.parse_args()
23
+
24
+
25
+ def main():
26
+ args = get_args()
27
+
28
+ oss_config = load_config(ObjectStorageServerConfig, args.config, args, section_name="object_storage_server")
29
+
30
+ setup_logger()
31
+
32
+ log_format_str, log_level_str, log_paths = get_logger_info(logging.getLogger())
33
+
34
+ ObjectStorageServer().run(
35
+ oss_config.object_storage_address.host,
36
+ oss_config.object_storage_address.port,
37
+ oss_config.object_storage_address.identity,
38
+ log_level_str,
39
+ log_format_str,
40
+ log_paths,
41
+ )
@@ -0,0 +1,135 @@
1
+ import argparse
2
+
3
+ from scaler.config.loader import load_config
4
+ from scaler.cluster.object_storage_server import ObjectStorageServerProcess
5
+ from scaler.cluster.scheduler import SchedulerProcess
6
+ from scaler.scheduler.allocate_policy.allocate_policy import AllocatePolicy
7
+ from scaler.config.section.scheduler import SchedulerConfig
8
+ from scaler.config.types.object_storage_server import ObjectStorageConfig
9
+ from scaler.utility.event_loop import EventLoopType
10
+ from scaler.utility.network_util import get_available_tcp_port
11
+
12
+
13
+ def get_args():
14
+ parser = argparse.ArgumentParser("scaler_scheduler", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
15
+
16
+ parser.add_argument("--config", "-c", type=str, default=None, help="Path to the TOML configuration file.")
17
+ parser.add_argument("--io-threads", type=int, help="number of io threads for zmq")
18
+ parser.add_argument(
19
+ "--max-number-of-tasks-waiting",
20
+ "-mt",
21
+ type=int,
22
+ help="max number of tasks can wait in scheduler while all workers are full",
23
+ )
24
+ parser.add_argument("--client-timeout-seconds", "-ct", type=int, help="discard client when timeout seconds reached")
25
+ parser.add_argument("--worker-timeout-seconds", "-wt", type=int, help="discard worker when timeout seconds reached")
26
+ parser.add_argument(
27
+ "--object-retention-seconds", "-ot", type=int, help="discard function in scheduler when timeout seconds reached"
28
+ )
29
+ parser.add_argument(
30
+ "--load-balance-seconds", "-ls", type=int, help="number of seconds for load balance operation in scheduler"
31
+ )
32
+ parser.add_argument(
33
+ "--load-balance-trigger-times",
34
+ "-lbt",
35
+ type=int,
36
+ help="exact number of repeated load balance advices when trigger load balance operation in scheduler",
37
+ )
38
+ parser.add_argument("--event-loop", "-e", choices=EventLoopType.allowed_types(), help="select event loop type")
39
+ parser.add_argument(
40
+ "--protected", "-p", action="store_true", help="protect scheduler and worker from being shutdown by client"
41
+ )
42
+ parser.add_argument(
43
+ "--allocate-policy",
44
+ "-ap",
45
+ choices=[p.name for p in AllocatePolicy],
46
+ help="specify allocate policy, this controls how scheduler will prioritize tasks, including balancing tasks",
47
+ )
48
+ parser.add_argument(
49
+ "--logging-paths",
50
+ "-lp",
51
+ nargs="*",
52
+ type=str,
53
+ help="specify where scheduler log should logged to, it can accept multiple files, default is /dev/stdout",
54
+ )
55
+ parser.add_argument("--logging-level", "-ll", type=str, help="specify the logging level")
56
+ parser.add_argument(
57
+ "--logging-config-file",
58
+ "-lc",
59
+ type=str,
60
+ help="use standard python the .conf file the specify python logging file configuration format, this will "
61
+ "bypass --logging-path",
62
+ )
63
+ parser.add_argument(
64
+ "--object-storage-address",
65
+ "-osa",
66
+ type=str,
67
+ help="specify the object storage server address, if not specified, the address is scheduler address with port "
68
+ "number plus 1, e.g.: if scheduler address is tcp://localhost:2345, then object storage address is "
69
+ "tcp://localhost:2346",
70
+ )
71
+ parser.add_argument(
72
+ "--monitor-address",
73
+ "-ma",
74
+ type=str,
75
+ help="specify monitoring address, if not specified, the monitoring address is scheduler address with port "
76
+ "number plus 2, e.g.: if scheduler address is tcp://localhost:2345, then monitoring address is "
77
+ "tcp://localhost:2347",
78
+ )
79
+ parser.add_argument(
80
+ "--adapter-webhook-url",
81
+ "-awu",
82
+ type=str,
83
+ help="specify the adapter webhook url, if not specified, the adapter will not be used",
84
+ )
85
+ parser.add_argument(
86
+ "scheduler_address", nargs="?", type=str, help="scheduler address to connect to, e.g.: `tcp://localhost:6378`"
87
+ )
88
+ return parser.parse_args()
89
+
90
+
91
+ def main():
92
+ args = get_args()
93
+
94
+ scheduler_config = load_config(SchedulerConfig, args.config, args, section_name="scheduler")
95
+
96
+ if args.object_storage_address is None:
97
+ object_storage_address = ObjectStorageConfig(
98
+ host=scheduler_config.scheduler_address.host, port=get_available_tcp_port()
99
+ )
100
+ object_storage = ObjectStorageServerProcess(
101
+ storage_address=object_storage_address,
102
+ logging_paths=scheduler_config.logging_paths,
103
+ logging_config_file=scheduler_config.logging_config_file,
104
+ logging_level=scheduler_config.logging_level,
105
+ )
106
+ object_storage.start()
107
+ object_storage.wait_until_ready() # object storage should be ready before starting the cluster
108
+ else:
109
+ object_storage_address = scheduler_config.object_storage_address
110
+ object_storage = None
111
+
112
+ scheduler = SchedulerProcess(
113
+ address=scheduler_config.scheduler_address,
114
+ storage_address=object_storage_address,
115
+ monitor_address=scheduler_config.monitor_address,
116
+ adapter_webhook_url=scheduler_config.adapter_webhook_url,
117
+ io_threads=scheduler_config.io_threads,
118
+ max_number_of_tasks_waiting=scheduler_config.max_number_of_tasks_waiting,
119
+ client_timeout_seconds=scheduler_config.client_timeout_seconds,
120
+ worker_timeout_seconds=scheduler_config.worker_timeout_seconds,
121
+ object_retention_seconds=scheduler_config.object_retention_seconds,
122
+ load_balance_seconds=scheduler_config.load_balance_seconds,
123
+ load_balance_trigger_times=scheduler_config.load_balance_trigger_times,
124
+ protected=scheduler_config.protected,
125
+ allocate_policy=scheduler_config.allocate_policy,
126
+ event_loop=scheduler_config.event_loop,
127
+ logging_paths=scheduler_config.logging_paths,
128
+ logging_config_file=scheduler_config.logging_config_file,
129
+ logging_level=scheduler_config.logging_level,
130
+ )
131
+ scheduler.start()
132
+
133
+ scheduler.join()
134
+ if object_storage is not None:
135
+ object_storage.join()