opengris-scaler 1.12.37__cp38-cp38-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. opengris_scaler-1.12.37.dist-info/METADATA +730 -0
  2. opengris_scaler-1.12.37.dist-info/RECORD +196 -0
  3. opengris_scaler-1.12.37.dist-info/WHEEL +5 -0
  4. opengris_scaler-1.12.37.dist-info/entry_points.txt +10 -0
  5. opengris_scaler-1.12.37.dist-info/licenses/LICENSE +201 -0
  6. opengris_scaler-1.12.37.dist-info/licenses/LICENSE.spdx +7 -0
  7. opengris_scaler-1.12.37.dist-info/licenses/NOTICE +8 -0
  8. opengris_scaler.libs/libcapnp-1-e88d5415.0.1.so +0 -0
  9. opengris_scaler.libs/libgcc_s-2298274a.so.1 +0 -0
  10. opengris_scaler.libs/libkj-1-9bebd8ac.0.1.so +0 -0
  11. opengris_scaler.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
  12. scaler/__init__.py +14 -0
  13. scaler/about.py +5 -0
  14. scaler/client/__init__.py +0 -0
  15. scaler/client/agent/__init__.py +0 -0
  16. scaler/client/agent/client_agent.py +218 -0
  17. scaler/client/agent/disconnect_manager.py +27 -0
  18. scaler/client/agent/future_manager.py +112 -0
  19. scaler/client/agent/heartbeat_manager.py +74 -0
  20. scaler/client/agent/mixins.py +89 -0
  21. scaler/client/agent/object_manager.py +98 -0
  22. scaler/client/agent/task_manager.py +64 -0
  23. scaler/client/client.py +672 -0
  24. scaler/client/future.py +252 -0
  25. scaler/client/object_buffer.py +129 -0
  26. scaler/client/object_reference.py +25 -0
  27. scaler/client/serializer/__init__.py +0 -0
  28. scaler/client/serializer/default.py +16 -0
  29. scaler/client/serializer/mixins.py +38 -0
  30. scaler/cluster/__init__.py +0 -0
  31. scaler/cluster/cluster.py +95 -0
  32. scaler/cluster/combo.py +157 -0
  33. scaler/cluster/object_storage_server.py +45 -0
  34. scaler/cluster/scheduler.py +86 -0
  35. scaler/config/__init__.py +0 -0
  36. scaler/config/common/__init__.py +0 -0
  37. scaler/config/common/logging.py +41 -0
  38. scaler/config/common/web.py +18 -0
  39. scaler/config/common/worker.py +65 -0
  40. scaler/config/common/worker_adapter.py +28 -0
  41. scaler/config/config_class.py +317 -0
  42. scaler/config/defaults.py +94 -0
  43. scaler/config/mixins.py +20 -0
  44. scaler/config/section/__init__.py +0 -0
  45. scaler/config/section/cluster.py +66 -0
  46. scaler/config/section/ecs_worker_adapter.py +78 -0
  47. scaler/config/section/native_worker_adapter.py +30 -0
  48. scaler/config/section/object_storage_server.py +13 -0
  49. scaler/config/section/scheduler.py +126 -0
  50. scaler/config/section/symphony_worker_adapter.py +35 -0
  51. scaler/config/section/top.py +16 -0
  52. scaler/config/section/webui.py +16 -0
  53. scaler/config/types/__init__.py +0 -0
  54. scaler/config/types/network_backend.py +12 -0
  55. scaler/config/types/object_storage_server.py +45 -0
  56. scaler/config/types/worker.py +67 -0
  57. scaler/config/types/zmq.py +83 -0
  58. scaler/entry_points/__init__.py +0 -0
  59. scaler/entry_points/cluster.py +10 -0
  60. scaler/entry_points/object_storage_server.py +26 -0
  61. scaler/entry_points/scheduler.py +51 -0
  62. scaler/entry_points/top.py +272 -0
  63. scaler/entry_points/webui.py +6 -0
  64. scaler/entry_points/worker_adapter_ecs.py +22 -0
  65. scaler/entry_points/worker_adapter_native.py +31 -0
  66. scaler/entry_points/worker_adapter_symphony.py +26 -0
  67. scaler/io/__init__.py +0 -0
  68. scaler/io/async_binder.py +89 -0
  69. scaler/io/async_connector.py +95 -0
  70. scaler/io/async_object_storage_connector.py +225 -0
  71. scaler/io/mixins.py +154 -0
  72. scaler/io/sync_connector.py +68 -0
  73. scaler/io/sync_object_storage_connector.py +249 -0
  74. scaler/io/sync_subscriber.py +83 -0
  75. scaler/io/utility.py +80 -0
  76. scaler/io/ymq/__init__.py +0 -0
  77. scaler/io/ymq/_ymq.pyi +95 -0
  78. scaler/io/ymq/_ymq.so +0 -0
  79. scaler/io/ymq/ymq.py +138 -0
  80. scaler/io/ymq_async_object_storage_connector.py +184 -0
  81. scaler/io/ymq_sync_object_storage_connector.py +184 -0
  82. scaler/object_storage/__init__.py +0 -0
  83. scaler/object_storage/object_storage_server.so +0 -0
  84. scaler/protocol/__init__.py +0 -0
  85. scaler/protocol/capnp/__init__.py +0 -0
  86. scaler/protocol/capnp/_python.py +6 -0
  87. scaler/protocol/capnp/common.capnp +68 -0
  88. scaler/protocol/capnp/message.capnp +218 -0
  89. scaler/protocol/capnp/object_storage.capnp +57 -0
  90. scaler/protocol/capnp/status.capnp +73 -0
  91. scaler/protocol/introduction.md +105 -0
  92. scaler/protocol/python/__init__.py +0 -0
  93. scaler/protocol/python/common.py +140 -0
  94. scaler/protocol/python/message.py +751 -0
  95. scaler/protocol/python/mixins.py +13 -0
  96. scaler/protocol/python/object_storage.py +118 -0
  97. scaler/protocol/python/status.py +279 -0
  98. scaler/protocol/worker.md +228 -0
  99. scaler/scheduler/__init__.py +0 -0
  100. scaler/scheduler/allocate_policy/__init__.py +0 -0
  101. scaler/scheduler/allocate_policy/allocate_policy.py +9 -0
  102. scaler/scheduler/allocate_policy/capability_allocate_policy.py +280 -0
  103. scaler/scheduler/allocate_policy/even_load_allocate_policy.py +159 -0
  104. scaler/scheduler/allocate_policy/mixins.py +55 -0
  105. scaler/scheduler/controllers/__init__.py +0 -0
  106. scaler/scheduler/controllers/balance_controller.py +65 -0
  107. scaler/scheduler/controllers/client_controller.py +131 -0
  108. scaler/scheduler/controllers/config_controller.py +31 -0
  109. scaler/scheduler/controllers/graph_controller.py +424 -0
  110. scaler/scheduler/controllers/information_controller.py +81 -0
  111. scaler/scheduler/controllers/mixins.py +194 -0
  112. scaler/scheduler/controllers/object_controller.py +147 -0
  113. scaler/scheduler/controllers/scaling_policies/__init__.py +0 -0
  114. scaler/scheduler/controllers/scaling_policies/fixed_elastic.py +145 -0
  115. scaler/scheduler/controllers/scaling_policies/mixins.py +10 -0
  116. scaler/scheduler/controllers/scaling_policies/null.py +14 -0
  117. scaler/scheduler/controllers/scaling_policies/types.py +9 -0
  118. scaler/scheduler/controllers/scaling_policies/utility.py +20 -0
  119. scaler/scheduler/controllers/scaling_policies/vanilla.py +95 -0
  120. scaler/scheduler/controllers/task_controller.py +376 -0
  121. scaler/scheduler/controllers/worker_controller.py +169 -0
  122. scaler/scheduler/object_usage/__init__.py +0 -0
  123. scaler/scheduler/object_usage/object_tracker.py +131 -0
  124. scaler/scheduler/scheduler.py +251 -0
  125. scaler/scheduler/task/__init__.py +0 -0
  126. scaler/scheduler/task/task_state_machine.py +92 -0
  127. scaler/scheduler/task/task_state_manager.py +61 -0
  128. scaler/ui/__init__.py +0 -0
  129. scaler/ui/common/__init__.py +0 -0
  130. scaler/ui/common/constants.py +9 -0
  131. scaler/ui/common/live_display.py +147 -0
  132. scaler/ui/common/memory_window.py +146 -0
  133. scaler/ui/common/setting_page.py +40 -0
  134. scaler/ui/common/task_graph.py +840 -0
  135. scaler/ui/common/task_log.py +111 -0
  136. scaler/ui/common/utility.py +66 -0
  137. scaler/ui/common/webui.py +80 -0
  138. scaler/ui/common/worker_processors.py +104 -0
  139. scaler/ui/v1.py +76 -0
  140. scaler/ui/v2.py +102 -0
  141. scaler/ui/webui.py +21 -0
  142. scaler/utility/__init__.py +0 -0
  143. scaler/utility/debug.py +19 -0
  144. scaler/utility/event_list.py +63 -0
  145. scaler/utility/event_loop.py +58 -0
  146. scaler/utility/exceptions.py +42 -0
  147. scaler/utility/formatter.py +44 -0
  148. scaler/utility/graph/__init__.py +0 -0
  149. scaler/utility/graph/optimization.py +27 -0
  150. scaler/utility/graph/topological_sorter.py +11 -0
  151. scaler/utility/graph/topological_sorter_graphblas.py +174 -0
  152. scaler/utility/identifiers.py +107 -0
  153. scaler/utility/logging/__init__.py +0 -0
  154. scaler/utility/logging/decorators.py +25 -0
  155. scaler/utility/logging/scoped_logger.py +33 -0
  156. scaler/utility/logging/utility.py +183 -0
  157. scaler/utility/many_to_many_dict.py +123 -0
  158. scaler/utility/metadata/__init__.py +0 -0
  159. scaler/utility/metadata/profile_result.py +31 -0
  160. scaler/utility/metadata/task_flags.py +30 -0
  161. scaler/utility/mixins.py +13 -0
  162. scaler/utility/network_util.py +7 -0
  163. scaler/utility/one_to_many_dict.py +72 -0
  164. scaler/utility/queues/__init__.py +0 -0
  165. scaler/utility/queues/async_indexed_queue.py +37 -0
  166. scaler/utility/queues/async_priority_queue.py +70 -0
  167. scaler/utility/queues/async_sorted_priority_queue.py +45 -0
  168. scaler/utility/queues/indexed_queue.py +114 -0
  169. scaler/utility/serialization.py +9 -0
  170. scaler/version.txt +1 -0
  171. scaler/worker/__init__.py +0 -0
  172. scaler/worker/agent/__init__.py +0 -0
  173. scaler/worker/agent/heartbeat_manager.py +110 -0
  174. scaler/worker/agent/mixins.py +137 -0
  175. scaler/worker/agent/processor/__init__.py +0 -0
  176. scaler/worker/agent/processor/object_cache.py +107 -0
  177. scaler/worker/agent/processor/processor.py +285 -0
  178. scaler/worker/agent/processor/streaming_buffer.py +28 -0
  179. scaler/worker/agent/processor_holder.py +147 -0
  180. scaler/worker/agent/processor_manager.py +369 -0
  181. scaler/worker/agent/profiling_manager.py +109 -0
  182. scaler/worker/agent/task_manager.py +150 -0
  183. scaler/worker/agent/timeout_manager.py +19 -0
  184. scaler/worker/preload.py +84 -0
  185. scaler/worker/worker.py +265 -0
  186. scaler/worker_adapter/__init__.py +0 -0
  187. scaler/worker_adapter/common.py +26 -0
  188. scaler/worker_adapter/ecs.py +241 -0
  189. scaler/worker_adapter/native.py +138 -0
  190. scaler/worker_adapter/symphony/__init__.py +0 -0
  191. scaler/worker_adapter/symphony/callback.py +45 -0
  192. scaler/worker_adapter/symphony/heartbeat_manager.py +82 -0
  193. scaler/worker_adapter/symphony/message.py +24 -0
  194. scaler/worker_adapter/symphony/task_manager.py +289 -0
  195. scaler/worker_adapter/symphony/worker.py +204 -0
  196. scaler/worker_adapter/symphony/worker_adapter.py +123 -0
@@ -0,0 +1,157 @@
1
+ import logging
2
+ import socket
3
+ from typing import Dict, Optional, Tuple
4
+
5
+ from scaler.cluster.cluster import Cluster
6
+ from scaler.cluster.object_storage_server import ObjectStorageServerProcess
7
+ from scaler.cluster.scheduler import SchedulerProcess
8
+ from scaler.config.common.logging import LoggingConfig
9
+ from scaler.config.common.worker import WorkerConfig
10
+ from scaler.config.defaults import (
11
+ DEFAULT_CLIENT_TIMEOUT_SECONDS,
12
+ DEFAULT_GARBAGE_COLLECT_INTERVAL_SECONDS,
13
+ DEFAULT_HARD_PROCESSOR_SUSPEND,
14
+ DEFAULT_HEARTBEAT_INTERVAL_SECONDS,
15
+ DEFAULT_IO_THREADS,
16
+ DEFAULT_LOAD_BALANCE_SECONDS,
17
+ DEFAULT_LOAD_BALANCE_TRIGGER_TIMES,
18
+ DEFAULT_LOGGING_LEVEL,
19
+ DEFAULT_LOGGING_PATHS,
20
+ DEFAULT_MAX_NUMBER_OF_TASKS_WAITING,
21
+ DEFAULT_OBJECT_RETENTION_SECONDS,
22
+ DEFAULT_PER_WORKER_QUEUE_SIZE,
23
+ DEFAULT_TASK_TIMEOUT_SECONDS,
24
+ DEFAULT_TRIM_MEMORY_THRESHOLD_BYTES,
25
+ DEFAULT_WORKER_DEATH_TIMEOUT,
26
+ DEFAULT_WORKER_TIMEOUT_SECONDS,
27
+ )
28
+ from scaler.config.section.cluster import ClusterConfig
29
+ from scaler.config.types.object_storage_server import ObjectStorageAddressConfig
30
+ from scaler.config.types.worker import WorkerCapabilities, WorkerNames
31
+ from scaler.config.types.zmq import ZMQConfig
32
+ from scaler.scheduler.allocate_policy.allocate_policy import AllocatePolicy
33
+ from scaler.scheduler.controllers.scaling_policies.types import ScalingControllerStrategy
34
+ from scaler.utility.network_util import get_available_tcp_port
35
+
36
+
37
+ class SchedulerClusterCombo:
38
+ def __init__(
39
+ self,
40
+ n_workers: int,
41
+ address: Optional[str] = None,
42
+ object_storage_address: Optional[str] = None,
43
+ monitor_address: Optional[str] = None,
44
+ per_worker_capabilities: Optional[Dict[str, int]] = None,
45
+ worker_io_threads: int = DEFAULT_IO_THREADS,
46
+ scheduler_io_threads: int = DEFAULT_IO_THREADS,
47
+ max_number_of_tasks_waiting: int = DEFAULT_MAX_NUMBER_OF_TASKS_WAITING,
48
+ heartbeat_interval_seconds: int = DEFAULT_HEARTBEAT_INTERVAL_SECONDS,
49
+ client_timeout_seconds: int = DEFAULT_CLIENT_TIMEOUT_SECONDS,
50
+ worker_timeout_seconds: int = DEFAULT_WORKER_TIMEOUT_SECONDS,
51
+ object_retention_seconds: int = DEFAULT_OBJECT_RETENTION_SECONDS,
52
+ task_timeout_seconds: int = DEFAULT_TASK_TIMEOUT_SECONDS,
53
+ death_timeout_seconds: int = DEFAULT_WORKER_DEATH_TIMEOUT,
54
+ load_balance_seconds: int = DEFAULT_LOAD_BALANCE_SECONDS,
55
+ load_balance_trigger_times: int = DEFAULT_LOAD_BALANCE_TRIGGER_TIMES,
56
+ garbage_collect_interval_seconds: int = DEFAULT_GARBAGE_COLLECT_INTERVAL_SECONDS,
57
+ trim_memory_threshold_bytes: int = DEFAULT_TRIM_MEMORY_THRESHOLD_BYTES,
58
+ per_worker_task_queue_size: int = DEFAULT_PER_WORKER_QUEUE_SIZE,
59
+ hard_processor_suspend: bool = DEFAULT_HARD_PROCESSOR_SUSPEND,
60
+ protected: bool = True,
61
+ allocate_policy: AllocatePolicy = AllocatePolicy.even,
62
+ event_loop: str = "builtin",
63
+ logging_paths: Tuple[str, ...] = DEFAULT_LOGGING_PATHS,
64
+ logging_level: str = DEFAULT_LOGGING_LEVEL,
65
+ logging_config_file: Optional[str] = None,
66
+ ):
67
+ if address is None:
68
+ self._address = ZMQConfig.from_string(f"tcp://127.0.0.1:{get_available_tcp_port()}")
69
+ else:
70
+ self._address = ZMQConfig.from_string(address)
71
+
72
+ if object_storage_address is None:
73
+ self._object_storage_address = ObjectStorageAddressConfig(self._address.host, get_available_tcp_port())
74
+ else:
75
+ self._object_storage_address = ObjectStorageAddressConfig.from_string(object_storage_address)
76
+
77
+ if monitor_address is None:
78
+ self._monitor_address = None
79
+ else:
80
+ self._monitor_address = ZMQConfig.from_string(monitor_address)
81
+
82
+ self._object_storage = ObjectStorageServerProcess(
83
+ object_storage_address=self._object_storage_address,
84
+ logging_paths=logging_paths,
85
+ logging_level=logging_level,
86
+ logging_config_file=logging_config_file,
87
+ )
88
+ self._object_storage.start()
89
+ self._object_storage.wait_until_ready() # object storage should be ready before starting the cluster
90
+
91
+ self._cluster = Cluster(
92
+ config=ClusterConfig(
93
+ scheduler_address=self._address,
94
+ object_storage_address=self._object_storage_address,
95
+ preload=None,
96
+ worker_names=WorkerNames([f"{socket.gethostname().split('.')[0]}" for _ in range(n_workers)]),
97
+ num_of_workers=n_workers,
98
+ event_loop=event_loop,
99
+ worker_io_threads=worker_io_threads,
100
+ worker_config=WorkerConfig(
101
+ per_worker_capabilities=WorkerCapabilities(per_worker_capabilities or {}),
102
+ per_worker_task_queue_size=per_worker_task_queue_size,
103
+ heartbeat_interval_seconds=heartbeat_interval_seconds,
104
+ task_timeout_seconds=task_timeout_seconds,
105
+ death_timeout_seconds=death_timeout_seconds,
106
+ garbage_collect_interval_seconds=garbage_collect_interval_seconds,
107
+ trim_memory_threshold_bytes=trim_memory_threshold_bytes,
108
+ hard_processor_suspend=hard_processor_suspend,
109
+ ),
110
+ logging_config=LoggingConfig(paths=logging_paths, config_file=logging_config_file, level=logging_level),
111
+ )
112
+ )
113
+
114
+ self._scheduler = SchedulerProcess(
115
+ address=self._address,
116
+ object_storage_address=self._object_storage_address,
117
+ monitor_address=self._monitor_address,
118
+ io_threads=scheduler_io_threads,
119
+ max_number_of_tasks_waiting=max_number_of_tasks_waiting,
120
+ client_timeout_seconds=client_timeout_seconds,
121
+ scaling_controller_strategy=ScalingControllerStrategy.NULL,
122
+ adapter_webhook_urls=(),
123
+ worker_timeout_seconds=worker_timeout_seconds,
124
+ object_retention_seconds=object_retention_seconds,
125
+ load_balance_seconds=load_balance_seconds,
126
+ load_balance_trigger_times=load_balance_trigger_times,
127
+ protected=protected,
128
+ allocate_policy=allocate_policy,
129
+ event_loop=event_loop,
130
+ logging_paths=logging_paths,
131
+ logging_config_file=logging_config_file,
132
+ logging_level=logging_level,
133
+ )
134
+
135
+ self._cluster.start()
136
+ self._scheduler.start()
137
+ logging.info(f"{self.__get_prefix()} started")
138
+
139
+ def __del__(self):
140
+ self.shutdown()
141
+
142
+ def shutdown(self):
143
+ logging.info(f"{self.__get_prefix()} shutdown")
144
+ self._cluster.terminate()
145
+ self._scheduler.terminate()
146
+ self._cluster.join()
147
+ self._scheduler.join()
148
+
149
+ # object storage should terminate after the cluster and scheduler.
150
+ self._object_storage.terminate()
151
+ self._object_storage.join()
152
+
153
+ def get_address(self) -> str:
154
+ return self._address.to_address()
155
+
156
+ def __get_prefix(self):
157
+ return f"{self.__class__.__name__}:"
@@ -0,0 +1,45 @@
1
+ import logging
2
+ import multiprocessing
3
+ from typing import Optional, Tuple
4
+
5
+ from scaler.config.types.object_storage_server import ObjectStorageAddressConfig
6
+ from scaler.object_storage.object_storage_server import ObjectStorageServer
7
+ from scaler.utility.logging.utility import get_logger_info, setup_logger
8
+
9
+
10
+ class ObjectStorageServerProcess(multiprocessing.get_context("fork").Process): # type: ignore[misc]
11
+ def __init__(
12
+ self,
13
+ object_storage_address: ObjectStorageAddressConfig,
14
+ logging_paths: Tuple[str, ...],
15
+ logging_level: str,
16
+ logging_config_file: Optional[str],
17
+ ):
18
+ multiprocessing.Process.__init__(self, name="ObjectStorageServer")
19
+
20
+ self._logging_paths = logging_paths
21
+ self._logging_level = logging_level
22
+ self._logging_config_file = logging_config_file
23
+
24
+ self._object_storage_address = object_storage_address
25
+
26
+ self._server = ObjectStorageServer()
27
+
28
+ def wait_until_ready(self) -> None:
29
+ """Blocks until the object storage server is available to server requests."""
30
+ self._server.wait_until_ready()
31
+
32
+ def run(self) -> None:
33
+ setup_logger(self._logging_paths, self._logging_config_file, self._logging_level)
34
+ logging.info(f"ObjectStorageServer: start and listen to {self._object_storage_address.to_string()}")
35
+
36
+ log_format_str, log_level_str, logging_paths = get_logger_info(logging.getLogger())
37
+
38
+ self._server.run(
39
+ self._object_storage_address.host,
40
+ self._object_storage_address.port,
41
+ self._object_storage_address.identity,
42
+ log_level_str,
43
+ log_format_str,
44
+ logging_paths,
45
+ )
@@ -0,0 +1,86 @@
1
+ import asyncio
2
+ import multiprocessing
3
+ import signal
4
+ from asyncio import AbstractEventLoop, Task
5
+ from typing import Any, Optional, Tuple
6
+
7
+ from scaler.config.section.scheduler import SchedulerConfig
8
+ from scaler.config.types.object_storage_server import ObjectStorageAddressConfig
9
+ from scaler.config.types.zmq import ZMQConfig
10
+ from scaler.scheduler.allocate_policy.allocate_policy import AllocatePolicy
11
+ from scaler.scheduler.controllers.scaling_policies.types import ScalingControllerStrategy
12
+ from scaler.scheduler.scheduler import Scheduler, scheduler_main
13
+ from scaler.utility.event_loop import register_event_loop
14
+ from scaler.utility.logging.utility import setup_logger
15
+
16
+
17
+ class SchedulerProcess(multiprocessing.get_context("spawn").Process): # type: ignore[misc]
18
+ def __init__(
19
+ self,
20
+ address: ZMQConfig,
21
+ object_storage_address: Optional[ObjectStorageAddressConfig],
22
+ monitor_address: Optional[ZMQConfig],
23
+ scaling_controller_strategy: ScalingControllerStrategy,
24
+ adapter_webhook_urls: Tuple[str, ...],
25
+ io_threads: int,
26
+ max_number_of_tasks_waiting: int,
27
+ client_timeout_seconds: int,
28
+ worker_timeout_seconds: int,
29
+ object_retention_seconds: int,
30
+ load_balance_seconds: int,
31
+ load_balance_trigger_times: int,
32
+ protected: bool,
33
+ allocate_policy: AllocatePolicy,
34
+ event_loop: str,
35
+ logging_paths: Tuple[str, ...],
36
+ logging_config_file: Optional[str],
37
+ logging_level: str,
38
+ ):
39
+ multiprocessing.Process.__init__(self, name="Scheduler")
40
+ self._scheduler_config = SchedulerConfig(
41
+ scheduler_address=address,
42
+ object_storage_address=object_storage_address,
43
+ monitor_address=monitor_address,
44
+ scaling_controller_strategy=scaling_controller_strategy,
45
+ adapter_webhook_urls=adapter_webhook_urls,
46
+ protected=protected,
47
+ allocate_policy=allocate_policy,
48
+ max_number_of_tasks_waiting=max_number_of_tasks_waiting,
49
+ client_timeout_seconds=client_timeout_seconds,
50
+ worker_timeout_seconds=worker_timeout_seconds,
51
+ object_retention_seconds=object_retention_seconds,
52
+ load_balance_seconds=load_balance_seconds,
53
+ load_balance_trigger_times=load_balance_trigger_times,
54
+ event_loop=event_loop,
55
+ worker_io_threads=io_threads,
56
+ )
57
+
58
+ self._logging_paths = logging_paths
59
+ self._logging_config_file = logging_config_file
60
+ self._logging_level = logging_level
61
+
62
+ self._scheduler: Optional[Scheduler] = None
63
+ self._loop: Optional[AbstractEventLoop] = None
64
+ self._task: Optional[Task[Any]] = None
65
+
66
+ def run(self) -> None:
67
+ # scheduler have its own single process
68
+ setup_logger(self._logging_paths, self._logging_config_file, self._logging_level)
69
+ register_event_loop(self._scheduler_config.event_loop)
70
+
71
+ self._loop = asyncio.get_event_loop()
72
+ SchedulerProcess.__register_signal(self._loop)
73
+
74
+ self._task = self._loop.create_task(scheduler_main(self._scheduler_config))
75
+
76
+ self._loop.run_until_complete(self._task)
77
+
78
+ @staticmethod
79
+ def __register_signal(loop):
80
+ loop.add_signal_handler(signal.SIGINT, SchedulerProcess.__handle_signal)
81
+ loop.add_signal_handler(signal.SIGTERM, SchedulerProcess.__handle_signal)
82
+
83
+ @staticmethod
84
+ def __handle_signal():
85
+ for task in asyncio.all_tasks():
86
+ task.cancel()
File without changes
File without changes
@@ -0,0 +1,41 @@
1
+ import dataclasses
2
+ from typing import Optional, Tuple
3
+
4
+ from scaler.config import defaults
5
+ from scaler.config.config_class import ConfigClass
6
+ from scaler.utility.logging.utility import LoggingLevel
7
+
8
+
9
+ @dataclasses.dataclass
10
+ class LoggingConfig(ConfigClass):
11
+ paths: Tuple[str, ...] = dataclasses.field(
12
+ default=defaults.DEFAULT_LOGGING_PATHS,
13
+ metadata=dict(
14
+ type=str,
15
+ long="--logging-paths",
16
+ short="-lp",
17
+ nargs="*",
18
+ help="specify where the cluster's log should logged to, there can be multiple paths."
19
+ '"/dev/stdout" means output to stdout, and is the default. '
20
+ "each worker has its own log file with process id appended to the path",
21
+ ),
22
+ )
23
+ config_file: Optional[str] = dataclasses.field(
24
+ default=None,
25
+ metadata=dict(
26
+ long="--logging-config-file",
27
+ short="-lcf",
28
+ help="provide a separate config file for logging, in python's standard .conf format. "
29
+ "this will supercede configuration passed to other --logging-* parameters, "
30
+ "and also does not support per-worker logging",
31
+ ),
32
+ )
33
+ level: str = dataclasses.field(
34
+ default=defaults.DEFAULT_LOGGING_LEVEL,
35
+ metadata=dict(
36
+ long="--logging-level",
37
+ short="-ll",
38
+ choices=[member.name for member in LoggingLevel if member is not LoggingLevel.NOTSET],
39
+ help="set the logging level",
40
+ ),
41
+ )
@@ -0,0 +1,18 @@
1
+ import dataclasses
2
+ from typing import Optional
3
+
4
+ from scaler.config.config_class import ConfigClass
5
+
6
+
7
+ @dataclasses.dataclass
8
+ class WebConfig(ConfigClass):
9
+ adapter_web_host: Optional[str] = dataclasses.field(
10
+ default=None, metadata=dict(required=True, help="host address for the worker adapter HTTP server")
11
+ )
12
+ adapter_web_port: Optional[int] = dataclasses.field(
13
+ default=None, metadata=dict(short="-p", required=True, help="port for the worker adapter HTTP server")
14
+ )
15
+
16
+ def __post_init__(self) -> None:
17
+ if self.adapter_web_port and not (1 <= self.adapter_web_port <= 65535):
18
+ raise ValueError(f"adapter_web_port must be between 1 and 65535, but got {self.adapter_web_port}")
@@ -0,0 +1,65 @@
1
+ import dataclasses
2
+
3
+ from scaler.config import defaults
4
+ from scaler.config.config_class import ConfigClass
5
+ from scaler.config.types.worker import WorkerCapabilities
6
+
7
+
8
+ @dataclasses.dataclass
9
+ class WorkerConfig(ConfigClass):
10
+ per_worker_capabilities: WorkerCapabilities = dataclasses.field(
11
+ default_factory=WorkerCapabilities,
12
+ metadata=dict(
13
+ short="-pwc", help='a comma-separated list of capabilities provided by the workers (e.g. "linux,cpu=4")'
14
+ ),
15
+ )
16
+ per_worker_task_queue_size: int = dataclasses.field(
17
+ default=defaults.DEFAULT_PER_WORKER_QUEUE_SIZE,
18
+ metadata=dict(short="-wtqs", help="set the per worker queue size"),
19
+ )
20
+ heartbeat_interval_seconds: int = dataclasses.field(
21
+ default=defaults.DEFAULT_HEARTBEAT_INTERVAL_SECONDS,
22
+ metadata=dict(short="-his", help="the interval at which to send heartbeats in seconds"),
23
+ )
24
+ task_timeout_seconds: int = dataclasses.field(
25
+ default=defaults.DEFAULT_TASK_TIMEOUT_SECONDS,
26
+ metadata=dict(
27
+ short="-tts", help="the number of seconds before a task is considered timed out and an error is raised"
28
+ ),
29
+ )
30
+ death_timeout_seconds: int = dataclasses.field(
31
+ default=defaults.DEFAULT_WORKER_DEATH_TIMEOUT, metadata=dict(short="-dts", help="death timeout seconds")
32
+ )
33
+ garbage_collect_interval_seconds: int = dataclasses.field(
34
+ default=defaults.DEFAULT_GARBAGE_COLLECT_INTERVAL_SECONDS,
35
+ metadata=dict(short="-gc", help="the interval at which the garbage collector is run in seconds"),
36
+ )
37
+ trim_memory_threshold_bytes: int = dataclasses.field(
38
+ default=defaults.DEFAULT_TRIM_MEMORY_THRESHOLD_BYTES,
39
+ metadata=dict(short="-tm", help="set the threshold for trimming libc's memory"),
40
+ )
41
+ hard_processor_suspend: bool = dataclasses.field(
42
+ default=defaults.DEFAULT_HARD_PROCESSOR_SUSPEND,
43
+ metadata=dict(
44
+ short="-hps",
45
+ action="store_true",
46
+ help=(
47
+ "when set, suspends worker processors using the SIGTSTP signal instead of a synchronization event, "
48
+ "fully halting computation on suspended tasks. this may cause some tasks to fail if they "
49
+ "do not support being paused at the OS level (e.g. tasks requiring active network connections)"
50
+ ),
51
+ ),
52
+ )
53
+
54
+ def __post_init__(self) -> None:
55
+ if self.per_worker_task_queue_size <= 0:
56
+ raise ValueError("per_worker_task_queue_size must be positive.")
57
+ if (
58
+ self.heartbeat_interval_seconds <= 0
59
+ or self.task_timeout_seconds < 0
60
+ or self.death_timeout_seconds <= 0
61
+ or self.garbage_collect_interval_seconds <= 0
62
+ ):
63
+ raise ValueError("All interval/timeout second values must be positive.")
64
+ if self.trim_memory_threshold_bytes < 0:
65
+ raise ValueError("trim_memory_threshold_bytes cannot be negative.")
@@ -0,0 +1,28 @@
1
+ import dataclasses
2
+ from typing import Optional
3
+
4
+ from scaler.config import defaults
5
+ from scaler.config.config_class import ConfigClass
6
+ from scaler.config.types.object_storage_server import ObjectStorageAddressConfig
7
+ from scaler.config.types.zmq import ZMQConfig
8
+
9
+
10
+ @dataclasses.dataclass
11
+ class WorkerAdapterConfig(ConfigClass):
12
+ scheduler_address: ZMQConfig = dataclasses.field(
13
+ metadata=dict(positional=True, help="scheduler address to connect workers to")
14
+ )
15
+
16
+ object_storage_address: Optional[ObjectStorageAddressConfig] = dataclasses.field(
17
+ default=None,
18
+ metadata=dict(short="-osa", help="specify the object storage server address, e.g.: tcp://localhost:2346"),
19
+ )
20
+
21
+ max_workers: int = dataclasses.field(
22
+ default=defaults.DEFAULT_NUMBER_OF_WORKER,
23
+ metadata=dict(short="-mw", help="maximum number of workers that can be started, -1 means no limit"),
24
+ )
25
+
26
+ def __post_init__(self) -> None:
27
+ if self.max_workers != -1 and self.max_workers <= 0:
28
+ raise ValueError("max_workers must be -1 (no limit) or a positive integer.")