matrice-compute 0.1.24__tar.gz → 0.1.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/PKG-INFO +1 -1
  2. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/matrice_compute.egg-info/PKG-INFO +1 -1
  3. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/action_instance.py +105 -8
  4. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/instance_utils.py +114 -0
  5. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/LICENSE.txt +0 -0
  6. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/README.md +0 -0
  7. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/matrice_compute.egg-info/SOURCES.txt +0 -0
  8. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/matrice_compute.egg-info/dependency_links.txt +0 -0
  9. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/matrice_compute.egg-info/not-zip-safe +0 -0
  10. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/matrice_compute.egg-info/top_level.txt +0 -0
  11. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/pyproject.toml +0 -0
  12. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/setup.cfg +0 -0
  13. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/setup.py +0 -0
  14. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/__init__.py +0 -0
  15. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/actions_manager.py +0 -0
  16. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/actions_scaledown_manager.py +0 -0
  17. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/instance_manager.py +0 -0
  18. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/prechecks.py +0 -0
  19. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/py.typed +0 -0
  20. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/resources_tracker.py +0 -0
  21. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/scaling.py +0 -0
  22. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/shutdown_manager.py +0 -0
  23. {matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/task_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: matrice_compute
3
- Version: 0.1.24
3
+ Version: 0.1.25
4
4
  Summary: Common server utilities for Matrice.ai services
5
5
  Author-email: "Matrice.ai" <dipendra@matrice.ai>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: matrice_compute
3
- Version: 0.1.24
3
+ Version: 0.1.25
4
4
  Summary: Common server utilities for Matrice.ai services
5
5
  Author-email: "Matrice.ai" <dipendra@matrice.ai>
6
6
  License-Expression: MIT
@@ -10,6 +10,7 @@ import signal
10
10
  import urllib.request
11
11
  from matrice_compute.instance_utils import (
12
12
  get_gpu_with_sufficient_memory_for_action,
13
+ get_gpu_config_for_deployment,
13
14
  get_decrypted_access_key_pair,
14
15
  get_max_file_system,
15
16
  get_best_service_ip_and_network,
@@ -26,6 +27,10 @@ from matrice_common.utils import log_errors
26
27
  class ActionInstance:
27
28
  """Base class for tasks that run in Action containers."""
28
29
 
30
+ # Class-level dictionary to track deployed services and their ports
31
+ # Key: _idService, Value: {"triton_ports": "port1,port2,port3", "is_first": False}
32
+ _deployed_services = {}
33
+
29
34
  def __init__(self, scaling: Scaling, action_info: dict):
30
35
  """Initialize an action instance.
31
36
 
@@ -84,6 +89,67 @@ class ActionInstance:
84
89
  raise ValueError(f"Unknown action type: {self.action_type}")
85
90
  self.task = self.actions_map[self.action_type]
86
91
 
92
+ @classmethod
93
+ def is_first_deployment_for_service(cls, service_id):
94
+ """Check if this is the first deployment for a given service.
95
+
96
+ Args:
97
+ service_id (str): Service ID (_idService)
98
+
99
+ Returns:
100
+ bool: True if this is the first deployment, False otherwise
101
+ """
102
+ if not service_id:
103
+ return False
104
+ return service_id not in cls._deployed_services
105
+
106
+ @classmethod
107
+ def get_or_create_triton_ports(cls, service_id, scaling_instance):
108
+ """Get existing TRITON_PORTS for a service or create new ones.
109
+
110
+ Args:
111
+ service_id (str): Service ID (_idService)
112
+ scaling_instance: Scaling instance to get open ports
113
+
114
+ Returns:
115
+ str: Comma-separated string of 3 port numbers (e.g., "8001,8002,8003")
116
+ """
117
+ if not service_id:
118
+ # No service_id, generate new ports
119
+ port1 = scaling_instance.get_open_port()
120
+ port2 = scaling_instance.get_open_port()
121
+ port3 = scaling_instance.get_open_port()
122
+ return f"{port1},{port2},{port3}"
123
+
124
+ # Check if ports already exist for this service
125
+ if service_id in cls._deployed_services:
126
+ triton_ports = cls._deployed_services[service_id]["triton_ports"]
127
+ logging.info(
128
+ "Reusing TRITON_PORTS for service %s: %s",
129
+ service_id,
130
+ triton_ports
131
+ )
132
+ return triton_ports
133
+
134
+ # First deployment: generate new ports and store them
135
+ port1 = scaling_instance.get_open_port()
136
+ port2 = scaling_instance.get_open_port()
137
+ port3 = scaling_instance.get_open_port()
138
+ triton_ports = f"{port1},{port2},{port3}"
139
+
140
+ # Store for future use
141
+ cls._deployed_services[service_id] = {
142
+ "triton_ports": triton_ports,
143
+ "is_first": False
144
+ }
145
+
146
+ logging.info(
147
+ "First deployment for service %s - generated TRITON_PORTS: %s",
148
+ service_id,
149
+ triton_ports
150
+ )
151
+ return triton_ports
152
+
87
153
  @log_errors(default_return={}, raise_exception=True, log_error=False)
88
154
  def _init_credentials(self):
89
155
  """Initialize Matrice credentials.
@@ -1387,10 +1453,27 @@ def redis_setup_execute(self: ActionInstance):
1387
1453
  f"docker run -d --net=host "
1388
1454
  f"--name redis_container_{int(time.time())} "
1389
1455
  f"--restart unless-stopped "
1456
+ f"--memory=32g "
1457
+ f"--cpus=8 "
1390
1458
  f"{redis_image} "
1391
- f"redis-server --bind 0.0.0.0 --appendonly yes --requirepass {redis_password}"
1459
+ f"redis-server --bind 0.0.0.0 "
1460
+ f"--appendonly no "
1461
+ f'--save "" '
1462
+ f"--maxmemory 30gb "
1463
+ f"--maxmemory-policy allkeys-lru "
1464
+ f"--io-threads 4 "
1465
+ f"--io-threads-do-reads yes "
1466
+ f"--stream-node-max-bytes 8192 "
1467
+ f"--stream-node-max-entries 1000 "
1468
+ f"--hz 100 "
1469
+ f"--tcp-backlog 2048 "
1470
+ f"--timeout 0 "
1471
+ f"--lazyfree-lazy-eviction yes "
1472
+ f"--lazyfree-lazy-expire yes "
1473
+ f"--lazyfree-lazy-server-del yes "
1474
+ f"--activedefrag yes "
1475
+ f"--requirepass {redis_password}"
1392
1476
  )
1393
-
1394
1477
  logging.info("Starting Redis container on %s:6379: %s", redis_host, redis_cmd)
1395
1478
 
1396
1479
  # Start Redis container first
@@ -1455,6 +1538,10 @@ def model_deploy_execute(self: ActionInstance):
1455
1538
  return
1456
1539
  action_id = action_details["_id"]
1457
1540
  model_family = action_details["actionDetails"]["modelFamily"]
1541
+
1542
+ # Get the service ID to track deployments
1543
+ service_id = action_details.get("_idService")
1544
+
1458
1545
  self.setup_action_requirements(
1459
1546
  action_details,
1460
1547
  work_fs,
@@ -1462,17 +1549,27 @@ def model_deploy_execute(self: ActionInstance):
1462
1549
  action_id=action_id,
1463
1550
  )
1464
1551
 
1465
- # Get GPU configuration based on requirements and availability
1466
- # This selects the GPU(s) with the most free memory to balance load
1467
- use_gpu = self.get_gpu_config(action_details)
1552
+ # Check if this is the first deployment for this service
1553
+ is_first_deployment = ActionInstance.is_first_deployment_for_service(service_id)
1554
+
1555
+ # Get GPU configuration (uses utility function with fail-safe fallback)
1556
+ use_gpu = get_gpu_config_for_deployment(action_details, is_first_deployment)
1468
1557
 
1469
1558
  logging.info(
1470
- "Action %s: Model deployment GPU config: %s",
1559
+ "Action %s: Model deployment GPU config: %s (first_deployment=%s)",
1471
1560
  action_id,
1472
- use_gpu if use_gpu else "CPU-only"
1561
+ use_gpu if use_gpu else "CPU-only",
1562
+ is_first_deployment
1473
1563
  )
1474
1564
 
1475
- extra_env_vars = {"INTERNAL_PORT": internal_port}
1565
+ # Get or create TRITON_PORTS (uses utility method)
1566
+ triton_ports = ActionInstance.get_or_create_triton_ports(service_id, self.scaling)
1567
+
1568
+ extra_env_vars = {
1569
+ "INTERNAL_PORT": internal_port,
1570
+ "TRITON_PORTS": triton_ports
1571
+ }
1572
+
1476
1573
  cmd = f'{self.get_base_docker_cmd(work_fs, use_gpu, mount_docker_sock=True, action_id=action_id, extra_env_vars=extra_env_vars, extra_pkgs=["matrice_inference", "matrice_analytics"])} python3 deploy.py {self.action_record_id} {external_port}"'
1477
1574
  logging.info("cmd is: %s", cmd)
1478
1575
  self.start(cmd, "deploy_log")
@@ -941,6 +941,120 @@ def get_single_gpu_with_sufficient_memory_for_action(
941
941
  raise ValueError(error_msg)
942
942
 
943
943
 
944
+ @log_errors(default_return="", raise_exception=False)
945
+ def get_gpu_config_for_deployment(action_details, is_first_deployment=False):
946
+ """Get GPU configuration for deployment actions.
947
+
948
+ For first deployment of a service, attempts to use all GPUs.
949
+ For subsequent deployments, uses standard GPU selection (most free memory).
950
+ Falls back gracefully to standard GPU selection if '--gpus all' is not available.
951
+
952
+ Args:
953
+ action_details (dict): Action details containing GPU requirements
954
+ is_first_deployment (bool): Whether this is the first deployment for this service
955
+
956
+ Returns:
957
+ str: GPU configuration string ('--gpus all' or '--gpus "device=X"' or '')
958
+ """
959
+ action_id = action_details.get("_id", "unknown")
960
+
961
+ # Check if GPU is required
962
+ gpu_required = action_details.get("actionDetails", {}).get("gpuRequired", False)
963
+ if not gpu_required:
964
+ logging.info(
965
+ "Action %s does not require GPU - will run on CPU",
966
+ action_id
967
+ )
968
+ return ""
969
+
970
+ # First deployment: try to use all GPUs
971
+ if is_first_deployment:
972
+ logging.info(
973
+ "Action %s: First deployment - attempting to use all GPUs",
974
+ action_id
975
+ )
976
+
977
+ try:
978
+ # Check if GPUs are available
979
+ result = subprocess.run(
980
+ ["nvidia-smi", "--query-gpu=count", "--format=csv,noheader"],
981
+ stdout=subprocess.PIPE,
982
+ stderr=subprocess.PIPE,
983
+ timeout=5,
984
+ check=False,
985
+ )
986
+
987
+ if result.returncode == 0 and result.stdout.strip():
988
+ # GPUs are available, use all of them
989
+ logging.info(
990
+ "Action %s: Using all GPUs for first deployment",
991
+ action_id
992
+ )
993
+ return '--gpus all'
994
+ else:
995
+ logging.warning(
996
+ "Action %s: No GPUs detected via nvidia-smi for first deployment, falling back to standard GPU selection",
997
+ action_id
998
+ )
999
+ except Exception as e:
1000
+ logging.warning(
1001
+ "Action %s: Error checking GPU availability (%s), falling back to standard GPU selection",
1002
+ action_id,
1003
+ str(e)
1004
+ )
1005
+
1006
+ # Fall back to standard GPU selection (most free memory)
1007
+ # This also handles subsequent deployments
1008
+ logging.info(
1009
+ "Action %s: Using standard GPU allocation (most free memory)",
1010
+ action_id
1011
+ )
1012
+
1013
+ required_memory = action_details.get("actionDetails", {}).get(
1014
+ "expectedResources", {}
1015
+ ).get("gpuMemory", 0)
1016
+
1017
+ try:
1018
+ # Get the GPU(s) with most free memory that have sufficient memory
1019
+ gpu_indices = get_gpu_with_sufficient_memory_for_action(
1020
+ action_details=action_details
1021
+ )
1022
+
1023
+ if gpu_indices:
1024
+ gpu_str = ",".join(map(str, gpu_indices))
1025
+ logging.info(
1026
+ "Action %s: Selected GPU device(s): %s (required memory: %d MB)",
1027
+ action_id,
1028
+ gpu_str,
1029
+ required_memory
1030
+ )
1031
+
1032
+ # Return Docker GPU configuration
1033
+ return f'--gpus "device={gpu_str}"'
1034
+ else:
1035
+ logging.warning(
1036
+ "Action %s: No GPUs with sufficient memory found (required: %d MB)",
1037
+ action_id,
1038
+ required_memory
1039
+ )
1040
+ return ""
1041
+
1042
+ except ValueError as e:
1043
+ logging.error(
1044
+ "Action %s: Error selecting GPU - %s",
1045
+ action_id,
1046
+ str(e)
1047
+ )
1048
+ return ""
1049
+ except Exception as e:
1050
+ logging.error(
1051
+ "Action %s: Unexpected error in GPU selection - %s",
1052
+ action_id,
1053
+ str(e)
1054
+ )
1055
+ return ""
1056
+
1057
+
944
1058
  @log_errors(default_return=(None, None), raise_exception=False)
945
1059
  def get_decrypted_access_key_pair(
946
1060
  enc_access_key: str,