ray-embedding 0.13.2__py3-none-any.whl → 0.13.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ray-embedding might be problematic. Click here for more details.
- ray_embedding/node_health.py +7 -4
- {ray_embedding-0.13.2.dist-info → ray_embedding-0.13.4.dist-info}/METADATA +1 -1
- {ray_embedding-0.13.2.dist-info → ray_embedding-0.13.4.dist-info}/RECORD +5 -5
- {ray_embedding-0.13.2.dist-info → ray_embedding-0.13.4.dist-info}/WHEEL +0 -0
- {ray_embedding-0.13.2.dist-info → ray_embedding-0.13.4.dist-info}/top_level.txt +0 -0
ray_embedding/node_health.py
CHANGED
|
@@ -8,7 +8,7 @@ from ray._private.services import get_node_ip_address
|
|
|
8
8
|
from ray.util.state import list_actors
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
@serve.deployment(autoscaling_config=dict(min_replicas=
|
|
11
|
+
@serve.deployment(autoscaling_config=dict(min_replicas=0, max_replicas=1),
|
|
12
12
|
ray_actor_options=dict(num_cpus=0.1))
|
|
13
13
|
class NodeHealthTracker:
|
|
14
14
|
"""Maintains a list of bad nodes, as reported by replicas that call the report_bad_node func.
|
|
@@ -41,12 +41,15 @@ class NodeHealthTracker:
|
|
|
41
41
|
async def is_bad_gpu_node(self, node_ip: str) -> bool:
|
|
42
42
|
self.logger.info(f"Checking if node {node_ip} is marked bad.")
|
|
43
43
|
with self.lock:
|
|
44
|
-
|
|
44
|
+
is_bad_gpu_node = node_ip in self.bad_gpu_node_ips
|
|
45
|
+
self.logger.info(f"Node {node_ip} is marked bad: {is_bad_gpu_node}")
|
|
46
|
+
return is_bad_gpu_node
|
|
45
47
|
|
|
46
48
|
async def is_bad_gpu_or_no_model_replica_on_node(self, node_ip: str):
|
|
47
49
|
self.logger.info(f"Checking if node {node_ip} is marked bad or no model replica running on the node.")
|
|
48
|
-
|
|
49
|
-
|
|
50
|
+
is_bad_gpu_node = await self.is_bad_gpu_node(node_ip)
|
|
51
|
+
is_no_model_replica_running_on_node = not await self.is_model_replica_running_on_node(node_ip)
|
|
52
|
+
return is_bad_gpu_node or is_no_model_replica_running_on_node
|
|
50
53
|
|
|
51
54
|
async def check_health(self):
|
|
52
55
|
"""Called periodically by Ray Serve. Used here to clean up stale node IDs."""
|
|
@@ -3,8 +3,8 @@ ray_embedding/deploy.py,sha256=2R7bQ7aPc9G8H9KVoemxum6-9YxmlXQogWbhFhuslko,3762
|
|
|
3
3
|
ray_embedding/dto.py,sha256=lk_LuVQPq3MLIMTMddqHviYXILY6V5dvbzDJuD_D_qc,1573
|
|
4
4
|
ray_embedding/embedding_model.py,sha256=P2xyXCznxXmdQBK6zodOJEMvxGVRMA8Ra3O5Qi7RCh0,6013
|
|
5
5
|
ray_embedding/model_router.py,sha256=fmaeXzaAJeCemzL9nUoXfdCrU-ZaCe_29fx5ayDCTC0,6845
|
|
6
|
-
ray_embedding/node_health.py,sha256=
|
|
7
|
-
ray_embedding-0.13.
|
|
8
|
-
ray_embedding-0.13.
|
|
9
|
-
ray_embedding-0.13.
|
|
10
|
-
ray_embedding-0.13.
|
|
6
|
+
ray_embedding/node_health.py,sha256=bsoIpn95adunS3gFouXsPjOYV_zuS1WufxBEEjCWX94,4973
|
|
7
|
+
ray_embedding-0.13.4.dist-info/METADATA,sha256=FLXxkhuH3PohNABcB1WdHE6qPyIpizf_X_6BQ1SPb4E,1074
|
|
8
|
+
ray_embedding-0.13.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
9
|
+
ray_embedding-0.13.4.dist-info/top_level.txt,sha256=ziCblpJq1YsrryshFqxTRuRMgNuO1_tgvAAkGShATNA,14
|
|
10
|
+
ray_embedding-0.13.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|