matrice-compute 0.1.24__tar.gz → 0.1.25__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/PKG-INFO +1 -1
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/matrice_compute.egg-info/PKG-INFO +1 -1
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/action_instance.py +105 -8
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/instance_utils.py +114 -0
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/LICENSE.txt +0 -0
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/README.md +0 -0
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/matrice_compute.egg-info/SOURCES.txt +0 -0
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/matrice_compute.egg-info/dependency_links.txt +0 -0
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/matrice_compute.egg-info/not-zip-safe +0 -0
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/matrice_compute.egg-info/top_level.txt +0 -0
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/pyproject.toml +0 -0
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/setup.cfg +0 -0
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/setup.py +0 -0
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/__init__.py +0 -0
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/actions_manager.py +0 -0
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/actions_scaledown_manager.py +0 -0
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/instance_manager.py +0 -0
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/prechecks.py +0 -0
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/py.typed +0 -0
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/resources_tracker.py +0 -0
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/scaling.py +0 -0
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/shutdown_manager.py +0 -0
- {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/task_utils.py +0 -0
|
@@ -10,6 +10,7 @@ import signal
|
|
|
10
10
|
import urllib.request
|
|
11
11
|
from matrice_compute.instance_utils import (
|
|
12
12
|
get_gpu_with_sufficient_memory_for_action,
|
|
13
|
+
get_gpu_config_for_deployment,
|
|
13
14
|
get_decrypted_access_key_pair,
|
|
14
15
|
get_max_file_system,
|
|
15
16
|
get_best_service_ip_and_network,
|
|
@@ -26,6 +27,10 @@ from matrice_common.utils import log_errors
|
|
|
26
27
|
class ActionInstance:
|
|
27
28
|
"""Base class for tasks that run in Action containers."""
|
|
28
29
|
|
|
30
|
+
# Class-level dictionary to track deployed services and their ports
|
|
31
|
+
# Key: _idService, Value: {"triton_ports": "port1,port2,port3", "is_first": False}
|
|
32
|
+
_deployed_services = {}
|
|
33
|
+
|
|
29
34
|
def __init__(self, scaling: Scaling, action_info: dict):
|
|
30
35
|
"""Initialize an action instance.
|
|
31
36
|
|
|
@@ -84,6 +89,67 @@ class ActionInstance:
|
|
|
84
89
|
raise ValueError(f"Unknown action type: {self.action_type}")
|
|
85
90
|
self.task = self.actions_map[self.action_type]
|
|
86
91
|
|
|
92
|
+
@classmethod
|
|
93
|
+
def is_first_deployment_for_service(cls, service_id):
|
|
94
|
+
"""Check if this is the first deployment for a given service.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
service_id (str): Service ID (_idService)
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
bool: True if this is the first deployment, False otherwise
|
|
101
|
+
"""
|
|
102
|
+
if not service_id:
|
|
103
|
+
return False
|
|
104
|
+
return service_id not in cls._deployed_services
|
|
105
|
+
|
|
106
|
+
@classmethod
|
|
107
|
+
def get_or_create_triton_ports(cls, service_id, scaling_instance):
|
|
108
|
+
"""Get existing TRITON_PORTS for a service or create new ones.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
service_id (str): Service ID (_idService)
|
|
112
|
+
scaling_instance: Scaling instance to get open ports
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
str: Comma-separated string of 3 port numbers (e.g., "8001,8002,8003")
|
|
116
|
+
"""
|
|
117
|
+
if not service_id:
|
|
118
|
+
# No service_id, generate new ports
|
|
119
|
+
port1 = scaling_instance.get_open_port()
|
|
120
|
+
port2 = scaling_instance.get_open_port()
|
|
121
|
+
port3 = scaling_instance.get_open_port()
|
|
122
|
+
return f"{port1},{port2},{port3}"
|
|
123
|
+
|
|
124
|
+
# Check if ports already exist for this service
|
|
125
|
+
if service_id in cls._deployed_services:
|
|
126
|
+
triton_ports = cls._deployed_services[service_id]["triton_ports"]
|
|
127
|
+
logging.info(
|
|
128
|
+
"Reusing TRITON_PORTS for service %s: %s",
|
|
129
|
+
service_id,
|
|
130
|
+
triton_ports
|
|
131
|
+
)
|
|
132
|
+
return triton_ports
|
|
133
|
+
|
|
134
|
+
# First deployment: generate new ports and store them
|
|
135
|
+
port1 = scaling_instance.get_open_port()
|
|
136
|
+
port2 = scaling_instance.get_open_port()
|
|
137
|
+
port3 = scaling_instance.get_open_port()
|
|
138
|
+
triton_ports = f"{port1},{port2},{port3}"
|
|
139
|
+
|
|
140
|
+
# Store for future use
|
|
141
|
+
cls._deployed_services[service_id] = {
|
|
142
|
+
"triton_ports": triton_ports,
|
|
143
|
+
"is_first": False
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
logging.info(
|
|
147
|
+
"First deployment for service %s - generated TRITON_PORTS: %s",
|
|
148
|
+
service_id,
|
|
149
|
+
triton_ports
|
|
150
|
+
)
|
|
151
|
+
return triton_ports
|
|
152
|
+
|
|
87
153
|
@log_errors(default_return={}, raise_exception=True, log_error=False)
|
|
88
154
|
def _init_credentials(self):
|
|
89
155
|
"""Initialize Matrice credentials.
|
|
@@ -1387,10 +1453,27 @@ def redis_setup_execute(self: ActionInstance):
|
|
|
1387
1453
|
f"docker run -d --net=host "
|
|
1388
1454
|
f"--name redis_container_{int(time.time())} "
|
|
1389
1455
|
f"--restart unless-stopped "
|
|
1456
|
+
f"--memory=32g "
|
|
1457
|
+
f"--cpus=8 "
|
|
1390
1458
|
f"{redis_image} "
|
|
1391
|
-
f"redis-server --bind 0.0.0.0
|
|
1459
|
+
f"redis-server --bind 0.0.0.0 "
|
|
1460
|
+
f"--appendonly no "
|
|
1461
|
+
f'--save "" '
|
|
1462
|
+
f"--maxmemory 30gb "
|
|
1463
|
+
f"--maxmemory-policy allkeys-lru "
|
|
1464
|
+
f"--io-threads 4 "
|
|
1465
|
+
f"--io-threads-do-reads yes "
|
|
1466
|
+
f"--stream-node-max-bytes 8192 "
|
|
1467
|
+
f"--stream-node-max-entries 1000 "
|
|
1468
|
+
f"--hz 100 "
|
|
1469
|
+
f"--tcp-backlog 2048 "
|
|
1470
|
+
f"--timeout 0 "
|
|
1471
|
+
f"--lazyfree-lazy-eviction yes "
|
|
1472
|
+
f"--lazyfree-lazy-expire yes "
|
|
1473
|
+
f"--lazyfree-lazy-server-del yes "
|
|
1474
|
+
f"--activedefrag yes "
|
|
1475
|
+
f"--requirepass {redis_password}"
|
|
1392
1476
|
)
|
|
1393
|
-
|
|
1394
1477
|
logging.info("Starting Redis container on %s:6379: %s", redis_host, redis_cmd)
|
|
1395
1478
|
|
|
1396
1479
|
# Start Redis container first
|
|
@@ -1455,6 +1538,10 @@ def model_deploy_execute(self: ActionInstance):
|
|
|
1455
1538
|
return
|
|
1456
1539
|
action_id = action_details["_id"]
|
|
1457
1540
|
model_family = action_details["actionDetails"]["modelFamily"]
|
|
1541
|
+
|
|
1542
|
+
# Get the service ID to track deployments
|
|
1543
|
+
service_id = action_details.get("_idService")
|
|
1544
|
+
|
|
1458
1545
|
self.setup_action_requirements(
|
|
1459
1546
|
action_details,
|
|
1460
1547
|
work_fs,
|
|
@@ -1462,17 +1549,27 @@ def model_deploy_execute(self: ActionInstance):
|
|
|
1462
1549
|
action_id=action_id,
|
|
1463
1550
|
)
|
|
1464
1551
|
|
|
1465
|
-
#
|
|
1466
|
-
|
|
1467
|
-
|
|
1552
|
+
# Check if this is the first deployment for this service
|
|
1553
|
+
is_first_deployment = ActionInstance.is_first_deployment_for_service(service_id)
|
|
1554
|
+
|
|
1555
|
+
# Get GPU configuration (uses utility function with fail-safe fallback)
|
|
1556
|
+
use_gpu = get_gpu_config_for_deployment(action_details, is_first_deployment)
|
|
1468
1557
|
|
|
1469
1558
|
logging.info(
|
|
1470
|
-
"Action %s: Model deployment GPU config: %s",
|
|
1559
|
+
"Action %s: Model deployment GPU config: %s (first_deployment=%s)",
|
|
1471
1560
|
action_id,
|
|
1472
|
-
use_gpu if use_gpu else "CPU-only"
|
|
1561
|
+
use_gpu if use_gpu else "CPU-only",
|
|
1562
|
+
is_first_deployment
|
|
1473
1563
|
)
|
|
1474
1564
|
|
|
1475
|
-
|
|
1565
|
+
# Get or create TRITON_PORTS (uses utility method)
|
|
1566
|
+
triton_ports = ActionInstance.get_or_create_triton_ports(service_id, self.scaling)
|
|
1567
|
+
|
|
1568
|
+
extra_env_vars = {
|
|
1569
|
+
"INTERNAL_PORT": internal_port,
|
|
1570
|
+
"TRITON_PORTS": triton_ports
|
|
1571
|
+
}
|
|
1572
|
+
|
|
1476
1573
|
cmd = f'{self.get_base_docker_cmd(work_fs, use_gpu, mount_docker_sock=True, action_id=action_id, extra_env_vars=extra_env_vars, extra_pkgs=["matrice_inference", "matrice_analytics"])} python3 deploy.py {self.action_record_id} {external_port}"'
|
|
1477
1574
|
logging.info("cmd is: %s", cmd)
|
|
1478
1575
|
self.start(cmd, "deploy_log")
|
|
@@ -941,6 +941,120 @@ def get_single_gpu_with_sufficient_memory_for_action(
|
|
|
941
941
|
raise ValueError(error_msg)
|
|
942
942
|
|
|
943
943
|
|
|
944
|
+
@log_errors(default_return="", raise_exception=False)
|
|
945
|
+
def get_gpu_config_for_deployment(action_details, is_first_deployment=False):
|
|
946
|
+
"""Get GPU configuration for deployment actions.
|
|
947
|
+
|
|
948
|
+
For first deployment of a service, attempts to use all GPUs.
|
|
949
|
+
For subsequent deployments, uses standard GPU selection (most free memory).
|
|
950
|
+
Falls back gracefully to standard GPU selection if '--gpus all' is not available.
|
|
951
|
+
|
|
952
|
+
Args:
|
|
953
|
+
action_details (dict): Action details containing GPU requirements
|
|
954
|
+
is_first_deployment (bool): Whether this is the first deployment for this service
|
|
955
|
+
|
|
956
|
+
Returns:
|
|
957
|
+
str: GPU configuration string ('--gpus all' or '--gpus "device=X"' or '')
|
|
958
|
+
"""
|
|
959
|
+
action_id = action_details.get("_id", "unknown")
|
|
960
|
+
|
|
961
|
+
# Check if GPU is required
|
|
962
|
+
gpu_required = action_details.get("actionDetails", {}).get("gpuRequired", False)
|
|
963
|
+
if not gpu_required:
|
|
964
|
+
logging.info(
|
|
965
|
+
"Action %s does not require GPU - will run on CPU",
|
|
966
|
+
action_id
|
|
967
|
+
)
|
|
968
|
+
return ""
|
|
969
|
+
|
|
970
|
+
# First deployment: try to use all GPUs
|
|
971
|
+
if is_first_deployment:
|
|
972
|
+
logging.info(
|
|
973
|
+
"Action %s: First deployment - attempting to use all GPUs",
|
|
974
|
+
action_id
|
|
975
|
+
)
|
|
976
|
+
|
|
977
|
+
try:
|
|
978
|
+
# Check if GPUs are available
|
|
979
|
+
result = subprocess.run(
|
|
980
|
+
["nvidia-smi", "--query-gpu=count", "--format=csv,noheader"],
|
|
981
|
+
stdout=subprocess.PIPE,
|
|
982
|
+
stderr=subprocess.PIPE,
|
|
983
|
+
timeout=5,
|
|
984
|
+
check=False,
|
|
985
|
+
)
|
|
986
|
+
|
|
987
|
+
if result.returncode == 0 and result.stdout.strip():
|
|
988
|
+
# GPUs are available, use all of them
|
|
989
|
+
logging.info(
|
|
990
|
+
"Action %s: Using all GPUs for first deployment",
|
|
991
|
+
action_id
|
|
992
|
+
)
|
|
993
|
+
return '--gpus all'
|
|
994
|
+
else:
|
|
995
|
+
logging.warning(
|
|
996
|
+
"Action %s: No GPUs detected via nvidia-smi for first deployment, falling back to standard GPU selection",
|
|
997
|
+
action_id
|
|
998
|
+
)
|
|
999
|
+
except Exception as e:
|
|
1000
|
+
logging.warning(
|
|
1001
|
+
"Action %s: Error checking GPU availability (%s), falling back to standard GPU selection",
|
|
1002
|
+
action_id,
|
|
1003
|
+
str(e)
|
|
1004
|
+
)
|
|
1005
|
+
|
|
1006
|
+
# Fall back to standard GPU selection (most free memory)
|
|
1007
|
+
# This also handles subsequent deployments
|
|
1008
|
+
logging.info(
|
|
1009
|
+
"Action %s: Using standard GPU allocation (most free memory)",
|
|
1010
|
+
action_id
|
|
1011
|
+
)
|
|
1012
|
+
|
|
1013
|
+
required_memory = action_details.get("actionDetails", {}).get(
|
|
1014
|
+
"expectedResources", {}
|
|
1015
|
+
).get("gpuMemory", 0)
|
|
1016
|
+
|
|
1017
|
+
try:
|
|
1018
|
+
# Get the GPU(s) with most free memory that have sufficient memory
|
|
1019
|
+
gpu_indices = get_gpu_with_sufficient_memory_for_action(
|
|
1020
|
+
action_details=action_details
|
|
1021
|
+
)
|
|
1022
|
+
|
|
1023
|
+
if gpu_indices:
|
|
1024
|
+
gpu_str = ",".join(map(str, gpu_indices))
|
|
1025
|
+
logging.info(
|
|
1026
|
+
"Action %s: Selected GPU device(s): %s (required memory: %d MB)",
|
|
1027
|
+
action_id,
|
|
1028
|
+
gpu_str,
|
|
1029
|
+
required_memory
|
|
1030
|
+
)
|
|
1031
|
+
|
|
1032
|
+
# Return Docker GPU configuration
|
|
1033
|
+
return f'--gpus "device={gpu_str}"'
|
|
1034
|
+
else:
|
|
1035
|
+
logging.warning(
|
|
1036
|
+
"Action %s: No GPUs with sufficient memory found (required: %d MB)",
|
|
1037
|
+
action_id,
|
|
1038
|
+
required_memory
|
|
1039
|
+
)
|
|
1040
|
+
return ""
|
|
1041
|
+
|
|
1042
|
+
except ValueError as e:
|
|
1043
|
+
logging.error(
|
|
1044
|
+
"Action %s: Error selecting GPU - %s",
|
|
1045
|
+
action_id,
|
|
1046
|
+
str(e)
|
|
1047
|
+
)
|
|
1048
|
+
return ""
|
|
1049
|
+
except Exception as e:
|
|
1050
|
+
logging.error(
|
|
1051
|
+
"Action %s: Unexpected error in GPU selection - %s",
|
|
1052
|
+
action_id,
|
|
1053
|
+
str(e)
|
|
1054
|
+
)
|
|
1055
|
+
return ""
|
|
1056
|
+
|
|
1057
|
+
|
|
944
1058
|
@log_errors(default_return=(None, None), raise_exception=False)
|
|
945
1059
|
def get_decrypted_access_key_pair(
|
|
946
1060
|
enc_access_key: str,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{matrice_compute-0.1.24 → matrice_compute-0.1.25}/matrice_compute.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/actions_scaledown_manager.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|