opengris-scaler 1.12.28__cp313-cp313-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of opengris-scaler might be problematic. Click here for more details.

Files changed (187) hide show
  1. opengris_scaler-1.12.28.dist-info/METADATA +728 -0
  2. opengris_scaler-1.12.28.dist-info/RECORD +187 -0
  3. opengris_scaler-1.12.28.dist-info/WHEEL +5 -0
  4. opengris_scaler-1.12.28.dist-info/entry_points.txt +10 -0
  5. opengris_scaler-1.12.28.dist-info/licenses/LICENSE +201 -0
  6. opengris_scaler-1.12.28.dist-info/licenses/LICENSE.spdx +7 -0
  7. opengris_scaler-1.12.28.dist-info/licenses/NOTICE +8 -0
  8. opengris_scaler.libs/libcapnp-1-e88d5415.0.1.so +0 -0
  9. opengris_scaler.libs/libgcc_s-2298274a.so.1 +0 -0
  10. opengris_scaler.libs/libkj-1-9bebd8ac.0.1.so +0 -0
  11. opengris_scaler.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
  12. scaler/__init__.py +14 -0
  13. scaler/about.py +5 -0
  14. scaler/client/__init__.py +0 -0
  15. scaler/client/agent/__init__.py +0 -0
  16. scaler/client/agent/client_agent.py +210 -0
  17. scaler/client/agent/disconnect_manager.py +27 -0
  18. scaler/client/agent/future_manager.py +112 -0
  19. scaler/client/agent/heartbeat_manager.py +74 -0
  20. scaler/client/agent/mixins.py +89 -0
  21. scaler/client/agent/object_manager.py +98 -0
  22. scaler/client/agent/task_manager.py +64 -0
  23. scaler/client/client.py +658 -0
  24. scaler/client/future.py +252 -0
  25. scaler/client/object_buffer.py +129 -0
  26. scaler/client/object_reference.py +25 -0
  27. scaler/client/serializer/__init__.py +0 -0
  28. scaler/client/serializer/default.py +16 -0
  29. scaler/client/serializer/mixins.py +38 -0
  30. scaler/cluster/__init__.py +0 -0
  31. scaler/cluster/cluster.py +115 -0
  32. scaler/cluster/combo.py +150 -0
  33. scaler/cluster/object_storage_server.py +45 -0
  34. scaler/cluster/scheduler.py +86 -0
  35. scaler/config/__init__.py +0 -0
  36. scaler/config/defaults.py +94 -0
  37. scaler/config/loader.py +96 -0
  38. scaler/config/mixins.py +20 -0
  39. scaler/config/section/__init__.py +0 -0
  40. scaler/config/section/cluster.py +55 -0
  41. scaler/config/section/ecs_worker_adapter.py +85 -0
  42. scaler/config/section/native_worker_adapter.py +43 -0
  43. scaler/config/section/object_storage_server.py +8 -0
  44. scaler/config/section/scheduler.py +54 -0
  45. scaler/config/section/symphony_worker_adapter.py +47 -0
  46. scaler/config/section/top.py +13 -0
  47. scaler/config/section/webui.py +21 -0
  48. scaler/config/types/__init__.py +0 -0
  49. scaler/config/types/network_backend.py +12 -0
  50. scaler/config/types/object_storage_server.py +45 -0
  51. scaler/config/types/worker.py +62 -0
  52. scaler/config/types/zmq.py +83 -0
  53. scaler/entry_points/__init__.py +0 -0
  54. scaler/entry_points/cluster.py +133 -0
  55. scaler/entry_points/object_storage_server.py +45 -0
  56. scaler/entry_points/scheduler.py +144 -0
  57. scaler/entry_points/top.py +286 -0
  58. scaler/entry_points/webui.py +48 -0
  59. scaler/entry_points/worker_adapter_ecs.py +191 -0
  60. scaler/entry_points/worker_adapter_native.py +137 -0
  61. scaler/entry_points/worker_adapter_symphony.py +98 -0
  62. scaler/io/__init__.py +0 -0
  63. scaler/io/async_binder.py +89 -0
  64. scaler/io/async_connector.py +95 -0
  65. scaler/io/async_object_storage_connector.py +225 -0
  66. scaler/io/mixins.py +154 -0
  67. scaler/io/sync_connector.py +68 -0
  68. scaler/io/sync_object_storage_connector.py +247 -0
  69. scaler/io/sync_subscriber.py +83 -0
  70. scaler/io/utility.py +80 -0
  71. scaler/io/ymq/__init__.py +0 -0
  72. scaler/io/ymq/_ymq.pyi +95 -0
  73. scaler/io/ymq/ymq.py +138 -0
  74. scaler/io/ymq_async_object_storage_connector.py +184 -0
  75. scaler/io/ymq_sync_object_storage_connector.py +184 -0
  76. scaler/object_storage/__init__.py +0 -0
  77. scaler/protocol/__init__.py +0 -0
  78. scaler/protocol/capnp/__init__.py +0 -0
  79. scaler/protocol/capnp/_python.py +6 -0
  80. scaler/protocol/capnp/common.capnp +68 -0
  81. scaler/protocol/capnp/message.capnp +218 -0
  82. scaler/protocol/capnp/object_storage.capnp +57 -0
  83. scaler/protocol/capnp/status.capnp +73 -0
  84. scaler/protocol/introduction.md +105 -0
  85. scaler/protocol/python/__init__.py +0 -0
  86. scaler/protocol/python/common.py +140 -0
  87. scaler/protocol/python/message.py +751 -0
  88. scaler/protocol/python/mixins.py +13 -0
  89. scaler/protocol/python/object_storage.py +118 -0
  90. scaler/protocol/python/status.py +279 -0
  91. scaler/protocol/worker.md +228 -0
  92. scaler/scheduler/__init__.py +0 -0
  93. scaler/scheduler/allocate_policy/__init__.py +0 -0
  94. scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
  95. scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
  96. scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
  97. scaler/scheduler/allocate_policy/mixins.py +55 -0
  98. scaler/scheduler/controllers/__init__.py +0 -0
  99. scaler/scheduler/controllers/balance_controller.py +65 -0
  100. scaler/scheduler/controllers/client_controller.py +131 -0
  101. scaler/scheduler/controllers/config_controller.py +31 -0
  102. scaler/scheduler/controllers/graph_controller.py +424 -0
  103. scaler/scheduler/controllers/information_controller.py +81 -0
  104. scaler/scheduler/controllers/mixins.py +194 -0
  105. scaler/scheduler/controllers/object_controller.py +147 -0
  106. scaler/scheduler/controllers/scaling_policies/__init__.py +0 -0
  107. scaler/scheduler/controllers/scaling_policies/fixed_elastic.py +145 -0
  108. scaler/scheduler/controllers/scaling_policies/mixins.py +10 -0
  109. scaler/scheduler/controllers/scaling_policies/null.py +14 -0
  110. scaler/scheduler/controllers/scaling_policies/types.py +9 -0
  111. scaler/scheduler/controllers/scaling_policies/utility.py +20 -0
  112. scaler/scheduler/controllers/scaling_policies/vanilla.py +95 -0
  113. scaler/scheduler/controllers/task_controller.py +376 -0
  114. scaler/scheduler/controllers/worker_controller.py +169 -0
  115. scaler/scheduler/object_usage/__init__.py +0 -0
  116. scaler/scheduler/object_usage/object_tracker.py +131 -0
  117. scaler/scheduler/scheduler.py +251 -0
  118. scaler/scheduler/task/__init__.py +0 -0
  119. scaler/scheduler/task/task_state_machine.py +92 -0
  120. scaler/scheduler/task/task_state_manager.py +61 -0
  121. scaler/ui/__init__.py +0 -0
  122. scaler/ui/constants.py +9 -0
  123. scaler/ui/live_display.py +147 -0
  124. scaler/ui/memory_window.py +146 -0
  125. scaler/ui/setting_page.py +40 -0
  126. scaler/ui/task_graph.py +832 -0
  127. scaler/ui/task_log.py +107 -0
  128. scaler/ui/utility.py +66 -0
  129. scaler/ui/webui.py +147 -0
  130. scaler/ui/worker_processors.py +104 -0
  131. scaler/utility/__init__.py +0 -0
  132. scaler/utility/debug.py +19 -0
  133. scaler/utility/event_list.py +63 -0
  134. scaler/utility/event_loop.py +58 -0
  135. scaler/utility/exceptions.py +42 -0
  136. scaler/utility/formatter.py +44 -0
  137. scaler/utility/graph/__init__.py +0 -0
  138. scaler/utility/graph/optimization.py +27 -0
  139. scaler/utility/graph/topological_sorter.py +11 -0
  140. scaler/utility/graph/topological_sorter_graphblas.py +174 -0
  141. scaler/utility/identifiers.py +107 -0
  142. scaler/utility/logging/__init__.py +0 -0
  143. scaler/utility/logging/decorators.py +25 -0
  144. scaler/utility/logging/scoped_logger.py +33 -0
  145. scaler/utility/logging/utility.py +183 -0
  146. scaler/utility/many_to_many_dict.py +123 -0
  147. scaler/utility/metadata/__init__.py +0 -0
  148. scaler/utility/metadata/profile_result.py +31 -0
  149. scaler/utility/metadata/task_flags.py +30 -0
  150. scaler/utility/mixins.py +13 -0
  151. scaler/utility/network_util.py +7 -0
  152. scaler/utility/one_to_many_dict.py +72 -0
  153. scaler/utility/queues/__init__.py +0 -0
  154. scaler/utility/queues/async_indexed_queue.py +37 -0
  155. scaler/utility/queues/async_priority_queue.py +70 -0
  156. scaler/utility/queues/async_sorted_priority_queue.py +45 -0
  157. scaler/utility/queues/indexed_queue.py +114 -0
  158. scaler/utility/serialization.py +9 -0
  159. scaler/version.txt +1 -0
  160. scaler/worker/__init__.py +0 -0
  161. scaler/worker/agent/__init__.py +0 -0
  162. scaler/worker/agent/heartbeat_manager.py +107 -0
  163. scaler/worker/agent/mixins.py +137 -0
  164. scaler/worker/agent/processor/__init__.py +0 -0
  165. scaler/worker/agent/processor/object_cache.py +107 -0
  166. scaler/worker/agent/processor/processor.py +285 -0
  167. scaler/worker/agent/processor/streaming_buffer.py +28 -0
  168. scaler/worker/agent/processor_holder.py +147 -0
  169. scaler/worker/agent/processor_manager.py +369 -0
  170. scaler/worker/agent/profiling_manager.py +109 -0
  171. scaler/worker/agent/task_manager.py +150 -0
  172. scaler/worker/agent/timeout_manager.py +19 -0
  173. scaler/worker/preload.py +84 -0
  174. scaler/worker/worker.py +265 -0
  175. scaler/worker_adapter/__init__.py +0 -0
  176. scaler/worker_adapter/common.py +26 -0
  177. scaler/worker_adapter/ecs.py +269 -0
  178. scaler/worker_adapter/native.py +155 -0
  179. scaler/worker_adapter/symphony/__init__.py +0 -0
  180. scaler/worker_adapter/symphony/callback.py +45 -0
  181. scaler/worker_adapter/symphony/heartbeat_manager.py +79 -0
  182. scaler/worker_adapter/symphony/message.py +24 -0
  183. scaler/worker_adapter/symphony/task_manager.py +289 -0
  184. scaler/worker_adapter/symphony/worker.py +204 -0
  185. scaler/worker_adapter/symphony/worker_adapter.py +139 -0
  186. src/scaler/io/ymq/_ymq.so +0 -0
  187. src/scaler/object_storage/object_storage_server.so +0 -0
@@ -0,0 +1,8 @@
1
+ import dataclasses
2
+
3
+ from scaler.config.types.object_storage_server import ObjectStorageConfig
4
+
5
+
6
+ @dataclasses.dataclass
7
+ class ObjectStorageServerConfig:
8
+ object_storage_address: ObjectStorageConfig
@@ -0,0 +1,54 @@
1
+ import dataclasses
2
+ from typing import Optional, Tuple
3
+ from urllib.parse import urlparse
4
+
5
+ from scaler.config import defaults
6
+ from scaler.config.types.object_storage_server import ObjectStorageConfig
7
+ from scaler.config.types.zmq import ZMQConfig
8
+ from scaler.scheduler.allocate_policy.allocate_policy import AllocatePolicy
9
+ from scaler.scheduler.controllers.scaling_policies.types import ScalingControllerStrategy
10
+ from scaler.utility.logging.utility import LoggingLevel
11
+
12
+
13
+ @dataclasses.dataclass
14
+ class SchedulerConfig:
15
+ scheduler_address: ZMQConfig = dataclasses.field()
16
+ object_storage_address: Optional[ObjectStorageConfig] = None
17
+ monitor_address: Optional[ZMQConfig] = None
18
+ scaling_controller_strategy: ScalingControllerStrategy = ScalingControllerStrategy.NULL
19
+ adapter_webhook_urls: Tuple[str, ...] = ()
20
+ protected: bool = True
21
+ allocate_policy: AllocatePolicy = AllocatePolicy.even
22
+ event_loop: str = "builtin"
23
+ io_threads: int = defaults.DEFAULT_IO_THREADS
24
+ max_number_of_tasks_waiting: int = defaults.DEFAULT_MAX_NUMBER_OF_TASKS_WAITING
25
+ client_timeout_seconds: int = defaults.DEFAULT_CLIENT_TIMEOUT_SECONDS
26
+ worker_timeout_seconds: int = defaults.DEFAULT_WORKER_TIMEOUT_SECONDS
27
+ object_retention_seconds: int = defaults.DEFAULT_OBJECT_RETENTION_SECONDS
28
+ load_balance_seconds: int = defaults.DEFAULT_LOAD_BALANCE_SECONDS
29
+ load_balance_trigger_times: int = defaults.DEFAULT_LOAD_BALANCE_TRIGGER_TIMES
30
+ logging_paths: Tuple[str, ...] = defaults.DEFAULT_LOGGING_PATHS
31
+ logging_config_file: Optional[str] = None
32
+ logging_level: str = defaults.DEFAULT_LOGGING_LEVEL
33
+
34
+ def __post_init__(self):
35
+ if self.io_threads <= 0:
36
+ raise ValueError("io_threads must be a positive integer.")
37
+ if self.max_number_of_tasks_waiting < -1:
38
+ raise ValueError("max_number_of_tasks_waiting must be -1 (for unlimited) or non-negative.")
39
+ if (
40
+ self.client_timeout_seconds <= 0
41
+ or self.worker_timeout_seconds <= 0
42
+ or self.object_retention_seconds <= 0
43
+ or self.load_balance_seconds <= 0
44
+ ):
45
+ raise ValueError("All timeout/retention/balance second values must be positive.")
46
+ if self.load_balance_trigger_times <= 0:
47
+ raise ValueError("load_balance_trigger_times must be a positive integer.")
48
+ for adapter_webhook_url in self.adapter_webhook_urls:
49
+ parsed_url = urlparse(adapter_webhook_url)
50
+ if not all([parsed_url.scheme, parsed_url.netloc]):
51
+ raise ValueError(f"adapter_webhook_urls contains url '{adapter_webhook_url}' which is not a valid URL.")
52
+ valid_levels = {level.name for level in LoggingLevel}
53
+ if self.logging_level.upper() not in valid_levels:
54
+ raise ValueError(f"logging_level must be one of {valid_levels}, but got '{self.logging_level}'")
@@ -0,0 +1,47 @@
1
+ import dataclasses
2
+ from typing import Optional, Tuple
3
+
4
+ from scaler.config import defaults
5
+ from scaler.config.types.object_storage_server import ObjectStorageConfig
6
+ from scaler.config.types.worker import WorkerCapabilities
7
+ from scaler.config.types.zmq import ZMQConfig
8
+ from scaler.utility.logging.utility import LoggingLevel
9
+
10
+
11
+ @dataclasses.dataclass
12
+ class SymphonyWorkerConfig:
13
+ scheduler_address: ZMQConfig
14
+ object_storage_address: Optional[ObjectStorageConfig]
15
+ service_name: str
16
+ base_concurrency: int = defaults.DEFAULT_NUMBER_OF_WORKER
17
+ worker_capabilities: WorkerCapabilities = dataclasses.field(
18
+ default_factory=lambda: WorkerCapabilities.from_string("")
19
+ )
20
+ adapter_web_host: str = "localhost"
21
+ adapter_web_port: int = 0
22
+ io_threads: int = defaults.DEFAULT_IO_THREADS
23
+ worker_task_queue_size: int = defaults.DEFAULT_PER_WORKER_QUEUE_SIZE
24
+ heartbeat_interval: int = defaults.DEFAULT_HEARTBEAT_INTERVAL_SECONDS
25
+ death_timeout_seconds: int = defaults.DEFAULT_WORKER_DEATH_TIMEOUT
26
+ event_loop: str = "builtin"
27
+ logging_paths: Tuple[str, ...] = defaults.DEFAULT_LOGGING_PATHS
28
+ logging_level: str = defaults.DEFAULT_LOGGING_LEVEL
29
+ logging_config_file: Optional[str] = None
30
+
31
+ def __post_init__(self):
32
+ """Validates configuration values after initialization."""
33
+ if (
34
+ self.base_concurrency <= 0
35
+ or self.worker_task_queue_size <= 0
36
+ or self.heartbeat_interval <= 0
37
+ or self.death_timeout_seconds <= 0
38
+ or self.io_threads <= 0
39
+ ):
40
+ raise ValueError("All concurrency, queue size, timeout, and thread count values must be positive integers.")
41
+
42
+ if not self.service_name:
43
+ raise ValueError("service_name cannot be an empty string.")
44
+
45
+ valid_levels = {level.name for level in LoggingLevel}
46
+ if self.logging_level.upper() not in valid_levels:
47
+ raise ValueError(f"logging_level must be one of {valid_levels}, but got '{self.logging_level}'")
@@ -0,0 +1,13 @@
1
+ import dataclasses
2
+
3
+ from scaler.config.types.zmq import ZMQConfig
4
+
5
+
6
+ @dataclasses.dataclass
7
+ class TopConfig:
8
+ monitor_address: ZMQConfig
9
+ timeout: int = 5
10
+
11
+ def __post_init__(self):
12
+ if self.timeout <= 0:
13
+ raise ValueError("timeout must be a positive integer.")
@@ -0,0 +1,21 @@
1
+ import dataclasses
2
+ from typing import Optional, Tuple
3
+
4
+ from scaler.config import defaults
5
+ from scaler.config.types.zmq import ZMQConfig
6
+
7
+
8
+ @dataclasses.dataclass
9
+ class WebUIConfig:
10
+ monitor_address: ZMQConfig
11
+ web_host: str = "0.0.0.0"
12
+ web_port: int = 50001
13
+ logging_paths: Tuple[str, ...] = defaults.DEFAULT_LOGGING_PATHS
14
+ logging_config_file: Optional[str] = None
15
+ logging_level: str = defaults.DEFAULT_LOGGING_LEVEL
16
+
17
+ def __post_init__(self):
18
+ if not isinstance(self.web_host, str):
19
+ raise TypeError(f"Web host should be string, given {self.web_host}")
20
+ if not isinstance(self.web_port, int):
21
+ raise TypeError(f"Web port should be an integer, given {self.web_port}")
File without changes
@@ -0,0 +1,12 @@
1
+ import enum
2
+
3
+
4
+ class NetworkBackend(enum.Enum):
5
+ """
6
+ Network backend to select when running scaler
7
+ - tcp_zmq means for oss it use raw tcp, for client/scheduler/worker communication it use zmq
8
+ - ymq means all components will use ymq for communication
9
+ """
10
+
11
+ tcp_zmq = enum.auto()
12
+ ymq = enum.auto()
@@ -0,0 +1,45 @@
1
+ import dataclasses
2
+ import ipaddress
3
+
4
+ from scaler.config.mixins import ConfigType
5
+
6
+
7
+ @dataclasses.dataclass
8
+ class ObjectStorageConfig(ConfigType):
9
+ host: str
10
+ port: int
11
+ identity: str = "ObjectStorageServer"
12
+
13
+ def __post_init__(self):
14
+ try:
15
+ ipaddress.ip_address(self.host)
16
+ except ValueError:
17
+ raise TypeError(f"Host must be a valid IP address, but got '{self.host}'")
18
+
19
+ if not isinstance(self.identity, str):
20
+ raise TypeError(f"Identity should be a string, given {self.identity}")
21
+
22
+ if not isinstance(self.port, int):
23
+ raise TypeError(f"Port should be an integer, given {self.port}")
24
+
25
+ def __str__(self) -> str:
26
+ return self.to_string()
27
+
28
+ def to_string(self) -> str:
29
+ return f"tcp://{self.host}:{self.port}"
30
+
31
+ @classmethod
32
+ def from_string(cls, value: str) -> "ObjectStorageConfig":
33
+ if not value.startswith("tcp://"):
34
+ raise ValueError("Address must start with 'tcp://'")
35
+
36
+ try:
37
+ host, port_str = value[6:].rsplit(":", 1)
38
+ port = int(port_str)
39
+
40
+ ipaddress.ip_address(host)
41
+
42
+ except (ValueError, IndexError):
43
+ raise ValueError(f"Invalid address format '{value}'. Expected format is tcp://<ip_address>:<port>")
44
+
45
+ return cls(host=host, port=port)
@@ -0,0 +1,62 @@
1
+ import dataclasses
2
+ import sys
3
+ from typing import Dict, List
4
+
5
+ if sys.version_info >= (3, 11):
6
+ from typing import Self
7
+ else:
8
+ from typing_extensions import Self
9
+
10
+ from scaler.config.mixins import ConfigType
11
+
12
+
13
+ @dataclasses.dataclass
14
+ class WorkerNames(ConfigType):
15
+ """Parses a comma-separated string of worker names into a list."""
16
+
17
+ names: List[str]
18
+
19
+ @classmethod
20
+ def from_string(cls, value: str) -> Self:
21
+ if not value:
22
+ return cls([])
23
+ names = [name.strip() for name in value.split(",")]
24
+ return cls(names)
25
+
26
+ def __str__(self) -> str:
27
+ return ",".join(self.names)
28
+
29
+ def __len__(self) -> int:
30
+ return len(self.names)
31
+
32
+
33
+ @dataclasses.dataclass
34
+ class WorkerCapabilities(ConfigType):
35
+ """Parses a string of worker capabilities."""
36
+
37
+ capabilities: Dict[str, int]
38
+
39
+ @classmethod
40
+ def from_string(cls, value: str) -> Self:
41
+ capabilities: Dict[str, int] = {}
42
+ if not value:
43
+ return cls(capabilities)
44
+ for item in value.split(","):
45
+ name, _, value = item.partition("=")
46
+ if value != "":
47
+ try:
48
+ capabilities[name.strip()] = int(value)
49
+ except ValueError:
50
+ raise ValueError(f"Invalid capability value for '{name}'. Expected an integer, but got '{value}'.")
51
+ else:
52
+ capabilities[name.strip()] = -1
53
+ return cls(capabilities)
54
+
55
+ def __str__(self) -> str:
56
+ items = []
57
+ for name, cap in self.capabilities.items():
58
+ if cap == -1:
59
+ items.append(name)
60
+ else:
61
+ items.append(f"{name}={cap}")
62
+ return ",".join(items)
@@ -0,0 +1,83 @@
1
+ import dataclasses
2
+ import enum
3
+ import sys
4
+ from typing import Optional
5
+
6
+ if sys.version_info >= (3, 11):
7
+ from typing import Self
8
+ else:
9
+ from typing_extensions import Self
10
+ from scaler.config.mixins import ConfigType
11
+
12
+
13
+ class ZMQType(enum.Enum):
14
+ inproc = "inproc"
15
+ ipc = "ipc"
16
+ tcp = "tcp"
17
+
18
+ @staticmethod
19
+ def allowed_types():
20
+ return {t.value for t in ZMQType}
21
+
22
+
23
+ @dataclasses.dataclass
24
+ class ZMQConfig(ConfigType):
25
+ type: ZMQType
26
+ host: str
27
+ port: Optional[int] = None
28
+
29
+ def __post_init__(self):
30
+ if not isinstance(self.type, ZMQType):
31
+ raise TypeError(f"Invalid zmq type {self.type}, available types are: {ZMQType.allowed_types()}")
32
+
33
+ if not isinstance(self.host, str):
34
+ raise TypeError(f"Host should be string, given {self.host}")
35
+
36
+ if self.port is None:
37
+ if self.type == ZMQType.tcp:
38
+ raise ValueError(f"type {self.type.value} should have `port`")
39
+ else:
40
+ if self.type in {ZMQType.inproc, ZMQType.ipc}:
41
+ raise ValueError(f"type {self.type.value} should not have `port`")
42
+
43
+ if not isinstance(self.port, int):
44
+ raise TypeError(f"Port should be integer, given {self.port}")
45
+
46
+ def to_address(self):
47
+ if self.type == ZMQType.tcp:
48
+ return f"tcp://{self.host}:{self.port}"
49
+
50
+ if self.type in {ZMQType.inproc, ZMQType.ipc}:
51
+ return f"{self.type.value}://{self.host}"
52
+
53
+ raise TypeError(f"Unsupported ZMQ type: {self.type}")
54
+
55
+ @classmethod
56
+ def from_string(cls, value: str) -> Self:
57
+ if "://" not in value:
58
+ raise ValueError("valid ZMQ config should be like tcp://127.0.0.1:12345")
59
+
60
+ socket_type, host_port = value.split("://", 1)
61
+ if socket_type not in ZMQType.allowed_types():
62
+ raise ValueError(f"supported ZMQ types are: {ZMQType.allowed_types()}")
63
+
64
+ socket_type_enum = ZMQType(socket_type)
65
+ if socket_type_enum in {ZMQType.inproc, ZMQType.ipc}:
66
+ host = host_port
67
+ port_int = None
68
+ elif socket_type_enum == ZMQType.tcp:
69
+ host, port = host_port.split(":")
70
+ try:
71
+ port_int = int(port)
72
+ except ValueError:
73
+ raise ValueError(f"cannot convert '{port}' to port number")
74
+ else:
75
+ raise ValueError(f"Unsupported ZMQ type: {socket_type}")
76
+
77
+ return cls(socket_type_enum, host, port_int)
78
+
79
+ def __str__(self) -> str:
80
+ return self.to_address()
81
+
82
+ def __repr__(self) -> str:
83
+ return self.to_address()
File without changes
@@ -0,0 +1,133 @@
1
+ import argparse
2
+ import socket
3
+
4
+ from scaler.cluster.cluster import Cluster
5
+ from scaler.config.loader import load_config
6
+ from scaler.config.section.cluster import ClusterConfig
7
+ from scaler.utility.event_loop import EventLoopType, register_event_loop
8
+
9
+
10
+ def get_args():
11
+ parser = argparse.ArgumentParser(
12
+ "standalone compute cluster", formatter_class=argparse.ArgumentDefaultsHelpFormatter
13
+ )
14
+ parser.add_argument("--config", "-c", type=str, default=None, help="Path to the TOML configuration file.")
15
+
16
+ parser.add_argument(
17
+ "--preload",
18
+ type=str,
19
+ default=None,
20
+ help='optional module init in the form "pkg.mod:func(arg1, arg2)" executed in each processor before tasks',
21
+ )
22
+ parser.add_argument("--num-of-workers", "-n", type=int, help="number of workers in cluster")
23
+ parser.add_argument(
24
+ "--worker-names",
25
+ "-wn",
26
+ type=str,
27
+ help="worker names to replace default worker names (host names), separate by comma",
28
+ )
29
+ parser.add_argument(
30
+ "--per-worker-capabilities",
31
+ "-pwc",
32
+ type=str,
33
+ help='comma-separated capabilities provided by the workers (e.g. "-pwc linux,cpu=4")',
34
+ )
35
+ parser.add_argument("--per-worker-task-queue-size", "-wtqs", type=int, help="specify per worker queue size")
36
+ parser.add_argument(
37
+ "--heartbeat-interval-seconds", "-hi", type=int, help="number of seconds to send heartbeat interval"
38
+ )
39
+ parser.add_argument(
40
+ "--task-timeout-seconds",
41
+ "-tts",
42
+ type=int,
43
+ help="number of seconds task treat as timeout and return an exception",
44
+ )
45
+ parser.add_argument("--garbage-collect-interval-seconds", "-gc", type=int, help="garbage collect interval seconds")
46
+ parser.add_argument("--death-timeout-seconds", "-ds", type=int, help="death timeout seconds")
47
+ parser.add_argument(
48
+ "--trim-memory-threshold-bytes", "-tm", type=int, help="number of bytes threshold to enable libc to trim memory"
49
+ )
50
+ parser.add_argument("--event-loop", "-el", choices=EventLoopType.allowed_types(), help="select event loop type")
51
+ parser.add_argument("--worker-io-threads", "-wit", type=int, help="specify number of io threads per worker")
52
+ parser.add_argument(
53
+ "--hard-processor-suspend",
54
+ "-hps",
55
+ action="store_true",
56
+ help=(
57
+ "When set, suspends worker processors using the SIGTSTP signal instead of a synchronization event, "
58
+ "fully halting computation on suspended tasks. Note that this may cause some tasks to fail if they "
59
+ "do not support being paused at the OS level (e.g. tasks requiring active network connections)."
60
+ ),
61
+ )
62
+ parser.add_argument("--log-hub-address", "-la", type=str, help="address for Worker send logs")
63
+ parser.add_argument(
64
+ "--logging-paths",
65
+ "-lp",
66
+ nargs="*",
67
+ type=str,
68
+ help='specify where cluster log should logged to, it can be multiple paths, "/dev/stdout" is default for '
69
+ "standard output, each worker will have its own log file with process id appended to the path",
70
+ )
71
+ parser.add_argument(
72
+ "--logging-level",
73
+ "-ll",
74
+ type=str,
75
+ choices=("DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"),
76
+ help="specify the logging level",
77
+ )
78
+ parser.add_argument(
79
+ "--logging-config-file",
80
+ "-lcf",
81
+ type=str,
82
+ help="use standard python the .conf file the specify python logging file configuration format, this will "
83
+ "bypass --logging-paths and --logging-level at the same time, and this will not work on per worker logging",
84
+ )
85
+ parser.add_argument(
86
+ "--object-storage-address",
87
+ "-osa",
88
+ type=str,
89
+ help="specify the object storage server address, e.g. tcp://localhost:2346. If not specified, use the address "
90
+ "provided by the scheduler",
91
+ )
92
+ parser.add_argument("scheduler_address", nargs="?", type=str, help="scheduler address to connect to")
93
+
94
+ return parser.parse_args()
95
+
96
+
97
+ def main():
98
+ args = get_args()
99
+
100
+ cluster_config = load_config(ClusterConfig, args.config, args, section_name="cluster")
101
+
102
+ register_event_loop(cluster_config.event_loop)
103
+
104
+ worker_names = cluster_config.worker_names.names
105
+ if not worker_names:
106
+ worker_names = [f"{socket.gethostname().split('.')[0]}" for _ in range(cluster_config.num_of_workers)]
107
+
108
+ if len(worker_names) != cluster_config.num_of_workers:
109
+ raise ValueError(
110
+ f"Number of worker names ({len(worker_names)}) must match the number of workers "
111
+ f"({cluster_config.num_of_workers})."
112
+ )
113
+
114
+ cluster = Cluster(
115
+ address=cluster_config.scheduler_address,
116
+ object_storage_address=cluster_config.object_storage_address,
117
+ preload=cluster_config.preload,
118
+ worker_names=worker_names,
119
+ per_worker_capabilities=cluster_config.per_worker_capabilities.capabilities,
120
+ per_worker_task_queue_size=cluster_config.per_worker_task_queue_size,
121
+ heartbeat_interval_seconds=cluster_config.heartbeat_interval_seconds,
122
+ task_timeout_seconds=cluster_config.task_timeout_seconds,
123
+ garbage_collect_interval_seconds=cluster_config.garbage_collect_interval_seconds,
124
+ trim_memory_threshold_bytes=cluster_config.trim_memory_threshold_bytes,
125
+ death_timeout_seconds=cluster_config.death_timeout_seconds,
126
+ hard_processor_suspend=cluster_config.hard_processor_suspend,
127
+ event_loop=cluster_config.event_loop,
128
+ worker_io_threads=cluster_config.worker_io_threads,
129
+ logging_paths=cluster_config.logging_paths,
130
+ logging_level=cluster_config.logging_level,
131
+ logging_config_file=cluster_config.logging_config_file,
132
+ )
133
+ cluster.run()
@@ -0,0 +1,45 @@
1
+ import argparse
2
+ import logging
3
+ import sys
4
+
5
+ from scaler.config.loader import load_config
6
+ from scaler.config.section.object_storage_server import ObjectStorageServerConfig
7
+ from scaler.object_storage.object_storage_server import ObjectStorageServer
8
+ from scaler.utility.logging.utility import get_logger_info, setup_logger
9
+
10
+
11
+ def get_args():
12
+ parser = argparse.ArgumentParser(
13
+ "scaler_object_storage_server", formatter_class=argparse.ArgumentDefaultsHelpFormatter
14
+ )
15
+ parser.add_argument("--config", "-c", type=str, default=None, help="Path to the TOML configuration file.")
16
+
17
+ parser.add_argument(
18
+ "object_storage_address",
19
+ nargs="?",
20
+ type=str,
21
+ help="specify the object storage server address to listen to, e.g. tcp://localhost:2345.",
22
+ )
23
+ return parser.parse_args()
24
+
25
+
26
+ def main():
27
+ args = get_args()
28
+
29
+ oss_config = load_config(ObjectStorageServerConfig, args.config, args, section_name="object_storage_server")
30
+
31
+ setup_logger()
32
+
33
+ log_format_str, log_level_str, log_paths = get_logger_info(logging.getLogger())
34
+
35
+ try:
36
+ ObjectStorageServer().run(
37
+ oss_config.object_storage_address.host,
38
+ oss_config.object_storage_address.port,
39
+ oss_config.object_storage_address.identity,
40
+ log_level_str,
41
+ log_format_str,
42
+ log_paths,
43
+ )
44
+ except KeyboardInterrupt:
45
+ sys.exit(0)