PyPI - matrice-compute - Versions diffs - 0.1.24__tar.gz → 0.1.25__tar.gz - Mend

matrice-compute 0.1.24tar.gz → 0.1.25tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

{matrice_compute-0.1.24 → matrice_compute-0.1.25}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: matrice_compute
-Version: 0.1.24
+Version: 0.1.25
 Summary: Common server utilities for Matrice.ai services
 Author-email: "Matrice.ai" <dipendra@matrice.ai>
 License-Expression: MIT

{matrice_compute-0.1.24 → matrice_compute-0.1.25}/matrice_compute.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: matrice_compute
-Version: 0.1.24
+Version: 0.1.25
 Summary: Common server utilities for Matrice.ai services
 Author-email: "Matrice.ai" <dipendra@matrice.ai>
 License-Expression: MIT

{matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/action_instance.py RENAMED Viewed

@@ -10,6 +10,7 @@ import signal
 import urllib.request
 from matrice_compute.instance_utils import (
     get_gpu_with_sufficient_memory_for_action,
+    get_gpu_config_for_deployment,
     get_decrypted_access_key_pair,
     get_max_file_system,
     get_best_service_ip_and_network,
@@ -26,6 +27,10 @@ from matrice_common.utils import log_errors
 class ActionInstance:
     """Base class for tasks that run in Action containers."""
+    # Class-level dictionary to track deployed services and their ports
+    # Key: _idService, Value: {"triton_ports": "port1,port2,port3", "is_first": False}
+    _deployed_services = {}
     def __init__(self, scaling: Scaling, action_info: dict):
         """Initialize an action instance.
@@ -84,6 +89,67 @@ class ActionInstance:
             raise ValueError(f"Unknown action type: {self.action_type}")
         self.task = self.actions_map[self.action_type]
+    @classmethod
+    def is_first_deployment_for_service(cls, service_id):
+        """Check if this is the first deployment for a given service.
+        Args:
+            service_id (str): Service ID (_idService)
+        Returns:
+            bool: True if this is the first deployment, False otherwise
+        """
+        if not service_id:
+            return False
+        return service_id not in cls._deployed_services
+    @classmethod
+    def get_or_create_triton_ports(cls, service_id, scaling_instance):
+        """Get existing TRITON_PORTS for a service or create new ones.
+        Args:
+            service_id (str): Service ID (_idService)
+            scaling_instance: Scaling instance to get open ports
+        Returns:
+            str: Comma-separated string of 3 port numbers (e.g., "8001,8002,8003")
+        """
+        if not service_id:
+            # No service_id, generate new ports
+            port1 = scaling_instance.get_open_port()
+            port2 = scaling_instance.get_open_port()
+            port3 = scaling_instance.get_open_port()
+            return f"{port1},{port2},{port3}"
+        # Check if ports already exist for this service
+        if service_id in cls._deployed_services:
+            triton_ports = cls._deployed_services[service_id]["triton_ports"]
+            logging.info(
+                "Reusing TRITON_PORTS for service %s: %s",
+                service_id,
+                triton_ports
+            )
+            return triton_ports
+        # First deployment: generate new ports and store them
+        port1 = scaling_instance.get_open_port()
+        port2 = scaling_instance.get_open_port()
+        port3 = scaling_instance.get_open_port()
+        triton_ports = f"{port1},{port2},{port3}"
+        # Store for future use
+        cls._deployed_services[service_id] = {
+            "triton_ports": triton_ports,
+            "is_first": False
+        }
+        logging.info(
+            "First deployment for service %s - generated TRITON_PORTS: %s",
+            service_id,
+            triton_ports
+        )
+        return triton_ports
     @log_errors(default_return={}, raise_exception=True, log_error=False)
     def _init_credentials(self):
         """Initialize Matrice credentials.
@@ -1387,10 +1453,27 @@ def redis_setup_execute(self: ActionInstance):
         f"docker run -d --net=host "
         f"--name redis_container_{int(time.time())} "
         f"--restart unless-stopped "
+        f"--memory=32g "
+        f"--cpus=8 "
         f"{redis_image} "
-        f"redis-server --bind 0.0.0.0 --appendonly yes --requirepass {redis_password}"
+        f"redis-server --bind 0.0.0.0 "
+        f"--appendonly no "
+        f'--save "" '
+        f"--maxmemory 30gb "
+        f"--maxmemory-policy allkeys-lru "
+        f"--io-threads 4 "
+        f"--io-threads-do-reads yes "
+        f"--stream-node-max-bytes 8192 "
+        f"--stream-node-max-entries 1000 "
+        f"--hz 100 "
+        f"--tcp-backlog 2048 "
+        f"--timeout 0 "
+        f"--lazyfree-lazy-eviction yes "
+        f"--lazyfree-lazy-expire yes "
+        f"--lazyfree-lazy-server-del yes "
+        f"--activedefrag yes "
+        f"--requirepass {redis_password}"
     )
     logging.info("Starting Redis container on %s:6379: %s", redis_host, redis_cmd)
     # Start Redis container first
@@ -1455,6 +1538,10 @@ def model_deploy_execute(self: ActionInstance):
         return
     action_id = action_details["_id"]
     model_family = action_details["actionDetails"]["modelFamily"]
+    # Get the service ID to track deployments
+    service_id = action_details.get("_idService")
     self.setup_action_requirements(
         action_details,
         work_fs,
@@ -1462,17 +1549,27 @@ def model_deploy_execute(self: ActionInstance):
         action_id=action_id,
     )
-    # Get GPU configuration based on requirements and availability
-    # This selects the GPU(s) with the most free memory to balance load
-    use_gpu = self.get_gpu_config(action_details)
+    # Check if this is the first deployment for this service
+    is_first_deployment = ActionInstance.is_first_deployment_for_service(service_id)
+    # Get GPU configuration (uses utility function with fail-safe fallback)
+    use_gpu = get_gpu_config_for_deployment(action_details, is_first_deployment)
     logging.info(
-        "Action %s: Model deployment GPU config: %s",
+        "Action %s: Model deployment GPU config: %s (first_deployment=%s)",
         action_id,
-        use_gpu if use_gpu else "CPU-only"
+        use_gpu if use_gpu else "CPU-only",
+        is_first_deployment
     )
-    extra_env_vars = {"INTERNAL_PORT": internal_port}
+    # Get or create TRITON_PORTS (uses utility method)
+    triton_ports = ActionInstance.get_or_create_triton_ports(service_id, self.scaling)
+    extra_env_vars = {
+        "INTERNAL_PORT": internal_port,
+        "TRITON_PORTS": triton_ports
+    }
     cmd = f'{self.get_base_docker_cmd(work_fs, use_gpu, mount_docker_sock=True, action_id=action_id, extra_env_vars=extra_env_vars, extra_pkgs=["matrice_inference", "matrice_analytics"])} python3 deploy.py {self.action_record_id} {external_port}"'
     logging.info("cmd is: %s", cmd)
     self.start(cmd, "deploy_log")

{matrice_compute-0.1.24 → matrice_compute-0.1.25}/src/matrice_compute/instance_utils.py RENAMED Viewed

@@ -941,6 +941,120 @@ def get_single_gpu_with_sufficient_memory_for_action(
     raise ValueError(error_msg)
+@log_errors(default_return="", raise_exception=False)
+def get_gpu_config_for_deployment(action_details, is_first_deployment=False):
+    """Get GPU configuration for deployment actions.
+    For first deployment of a service, attempts to use all GPUs.
+    For subsequent deployments, uses standard GPU selection (most free memory).
+    Falls back gracefully to standard GPU selection if '--gpus all' is not available.
+    Args:
+        action_details (dict): Action details containing GPU requirements
+        is_first_deployment (bool): Whether this is the first deployment for this service
+    Returns:
+        str: GPU configuration string ('--gpus all' or '--gpus "device=X"' or '')
+    """
+    action_id = action_details.get("_id", "unknown")
+    # Check if GPU is required
+    gpu_required = action_details.get("actionDetails", {}).get("gpuRequired", False)
+    if not gpu_required:
+        logging.info(
+            "Action %s does not require GPU - will run on CPU",
+            action_id
+        )
+        return ""
+    # First deployment: try to use all GPUs
+    if is_first_deployment:
+        logging.info(
+            "Action %s: First deployment - attempting to use all GPUs",
+            action_id
+        )
+        try:
+            # Check if GPUs are available
+            result = subprocess.run(
+                ["nvidia-smi", "--query-gpu=count", "--format=csv,noheader"],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                timeout=5,
+                check=False,
+            )
+            if result.returncode == 0 and result.stdout.strip():
+                # GPUs are available, use all of them
+                logging.info(
+                    "Action %s: Using all GPUs for first deployment",
+                    action_id
+                )
+                return '--gpus all'
+            else:
+                logging.warning(
+                    "Action %s: No GPUs detected via nvidia-smi for first deployment, falling back to standard GPU selection",
+                    action_id
+                )
+        except Exception as e:
+            logging.warning(
+                "Action %s: Error checking GPU availability (%s), falling back to standard GPU selection",
+                action_id,
+                str(e)
+            )
+    # Fall back to standard GPU selection (most free memory)
+    # This also handles subsequent deployments
+    logging.info(
+        "Action %s: Using standard GPU allocation (most free memory)",
+        action_id
+    )
+    required_memory = action_details.get("actionDetails", {}).get(
+        "expectedResources", {}
+    ).get("gpuMemory", 0)
+    try:
+        # Get the GPU(s) with most free memory that have sufficient memory
+        gpu_indices = get_gpu_with_sufficient_memory_for_action(
+            action_details=action_details
+        )
+        if gpu_indices:
+            gpu_str = ",".join(map(str, gpu_indices))
+            logging.info(
+                "Action %s: Selected GPU device(s): %s (required memory: %d MB)",
+                action_id,
+                gpu_str,
+                required_memory
+            )
+            # Return Docker GPU configuration
+            return f'--gpus "device={gpu_str}"'
+        else:
+            logging.warning(
+                "Action %s: No GPUs with sufficient memory found (required: %d MB)",
+                action_id,
+                required_memory
+            )
+            return ""
+    except ValueError as e:
+        logging.error(
+            "Action %s: Error selecting GPU - %s",
+            action_id,
+            str(e)
+        )
+        return ""
+    except Exception as e:
+        logging.error(
+            "Action %s: Unexpected error in GPU selection - %s",
+            action_id,
+            str(e)
+        )
+        return ""
 @log_errors(default_return=(None, None), raise_exception=False)
 def get_decrypted_access_key_pair(
     enc_access_key: str,