PyPI - matrice-compute - Versions diffs - 0.1.23__py3-none-any.whl → 0.1.25__py3-none-any.whl - Mend

matrice-compute 0.1.23py3-none-any.whl → 0.1.25py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

matrice_compute/action_instance.py CHANGED Viewed

@@ -10,6 +10,7 @@ import signal
 import urllib.request
 from matrice_compute.instance_utils import (
     get_gpu_with_sufficient_memory_for_action,
+    get_gpu_config_for_deployment,
     get_decrypted_access_key_pair,
     get_max_file_system,
     get_best_service_ip_and_network,
@@ -26,6 +27,10 @@ from matrice_common.utils import log_errors
 class ActionInstance:
     """Base class for tasks that run in Action containers."""
+    # Class-level dictionary to track deployed services and their ports
+    # Key: _idService, Value: {"triton_ports": "port1,port2,port3", "is_first": False}
+    _deployed_services = {}
     def __init__(self, scaling: Scaling, action_info: dict):
         """Initialize an action instance.
@@ -84,6 +89,67 @@ class ActionInstance:
             raise ValueError(f"Unknown action type: {self.action_type}")
         self.task = self.actions_map[self.action_type]
+    @classmethod
+    def is_first_deployment_for_service(cls, service_id):
+        """Check if this is the first deployment for a given service.
+        Args:
+            service_id (str): Service ID (_idService)
+        Returns:
+            bool: True if this is the first deployment, False otherwise
+        """
+        if not service_id:
+            return False
+        return service_id not in cls._deployed_services
+    @classmethod
+    def get_or_create_triton_ports(cls, service_id, scaling_instance):
+        """Get existing TRITON_PORTS for a service or create new ones.
+        Args:
+            service_id (str): Service ID (_idService)
+            scaling_instance: Scaling instance to get open ports
+        Returns:
+            str: Comma-separated string of 3 port numbers (e.g., "8001,8002,8003")
+        """
+        if not service_id:
+            # No service_id, generate new ports
+            port1 = scaling_instance.get_open_port()
+            port2 = scaling_instance.get_open_port()
+            port3 = scaling_instance.get_open_port()
+            return f"{port1},{port2},{port3}"
+        # Check if ports already exist for this service
+        if service_id in cls._deployed_services:
+            triton_ports = cls._deployed_services[service_id]["triton_ports"]
+            logging.info(
+                "Reusing TRITON_PORTS for service %s: %s",
+                service_id,
+                triton_ports
+            )
+            return triton_ports
+        # First deployment: generate new ports and store them
+        port1 = scaling_instance.get_open_port()
+        port2 = scaling_instance.get_open_port()
+        port3 = scaling_instance.get_open_port()
+        triton_ports = f"{port1},{port2},{port3}"
+        # Store for future use
+        cls._deployed_services[service_id] = {
+            "triton_ports": triton_ports,
+            "is_first": False
+        }
+        logging.info(
+            "First deployment for service %s - generated TRITON_PORTS: %s",
+            service_id,
+            triton_ports
+        )
+        return triton_ports
     @log_errors(default_return={}, raise_exception=True, log_error=False)
     def _init_credentials(self):
         """Initialize Matrice credentials.
@@ -285,13 +351,13 @@ class ActionInstance:
         ).get("gpuMemory", 0)
         logging.info(
-            "Action %s requires GPU with %d MB memory - selecting best-fit GPU(s)",
+            "Action %s requires GPU with %d MB memory - selecting GPU(s) with most free memory",
             action_id,
             required_memory
         )
         try:
-            # Get the best-fit GPU(s) with sufficient memory
+            # Get the GPU(s) with most free memory that have sufficient memory
             gpu_indices = get_gpu_with_sufficient_memory_for_action(
                 action_details=action_details
             )
@@ -1387,10 +1453,27 @@ def redis_setup_execute(self: ActionInstance):
         f"docker run -d --net=host "
         f"--name redis_container_{int(time.time())} "
         f"--restart unless-stopped "
+        f"--memory=32g "
+        f"--cpus=8 "
         f"{redis_image} "
-        f"redis-server --bind 0.0.0.0 --appendonly yes --requirepass {redis_password}"
+        f"redis-server --bind 0.0.0.0 "
+        f"--appendonly no "
+        f'--save "" '
+        f"--maxmemory 30gb "
+        f"--maxmemory-policy allkeys-lru "
+        f"--io-threads 4 "
+        f"--io-threads-do-reads yes "
+        f"--stream-node-max-bytes 8192 "
+        f"--stream-node-max-entries 1000 "
+        f"--hz 100 "
+        f"--tcp-backlog 2048 "
+        f"--timeout 0 "
+        f"--lazyfree-lazy-eviction yes "
+        f"--lazyfree-lazy-expire yes "
+        f"--lazyfree-lazy-server-del yes "
+        f"--activedefrag yes "
+        f"--requirepass {redis_password}"
     )
     logging.info("Starting Redis container on %s:6379: %s", redis_host, redis_cmd)
     # Start Redis container first
@@ -1455,6 +1538,10 @@ def model_deploy_execute(self: ActionInstance):
         return
     action_id = action_details["_id"]
     model_family = action_details["actionDetails"]["modelFamily"]
+    # Get the service ID to track deployments
+    service_id = action_details.get("_idService")
     self.setup_action_requirements(
         action_details,
         work_fs,
@@ -1462,17 +1549,27 @@ def model_deploy_execute(self: ActionInstance):
         action_id=action_id,
     )
-    # Get GPU configuration based on requirements and availability
-    # This uses the best-fit algorithm to select the most appropriate GPU(s)
-    use_gpu = self.get_gpu_config(action_details)
+    # Check if this is the first deployment for this service
+    is_first_deployment = ActionInstance.is_first_deployment_for_service(service_id)
+    # Get GPU configuration (uses utility function with fail-safe fallback)
+    use_gpu = get_gpu_config_for_deployment(action_details, is_first_deployment)
     logging.info(
-        "Action %s: Model deployment GPU config: %s",
+        "Action %s: Model deployment GPU config: %s (first_deployment=%s)",
         action_id,
-        use_gpu if use_gpu else "CPU-only"
+        use_gpu if use_gpu else "CPU-only",
+        is_first_deployment
     )
-    extra_env_vars = {"INTERNAL_PORT": internal_port}
+    # Get or create TRITON_PORTS (uses utility method)
+    triton_ports = ActionInstance.get_or_create_triton_ports(service_id, self.scaling)
+    extra_env_vars = {
+        "INTERNAL_PORT": internal_port,
+        "TRITON_PORTS": triton_ports
+    }
     cmd = f'{self.get_base_docker_cmd(work_fs, use_gpu, mount_docker_sock=True, action_id=action_id, extra_env_vars=extra_env_vars, extra_pkgs=["matrice_inference", "matrice_analytics"])} python3 deploy.py {self.action_record_id} {external_port}"'
     logging.info("cmd is: %s", cmd)
     self.start(cmd, "deploy_log")

matrice_compute/instance_utils.py CHANGED Viewed

@@ -600,13 +600,18 @@ def is_allowed_gpu_device(gpu_index: int) -> bool:
     Returns:
         bool: True if GPU is allowed (or no filter is set), False otherwise
     """
-    gpus = os.environ.get("GPUS")
-    if not gpus:
-        # No filter set - all GPUs are allowed
+    gpus = os.environ.get("GPUS", "").strip()
+    # No filter set or empty string - all GPUs are allowed
+    if not gpus or gpus == '""' or gpus == "''":
         return True
     try:
-        allowed_gpus = [int(x) for x in gpus.split(",") if x.strip()]
+        allowed_gpus = [int(x.strip()) for x in gpus.split(",") if x.strip()]
+        # If no valid GPUs after parsing, allow all
+        if not allowed_gpus:
+            return True
         is_allowed = int(gpu_index) in allowed_gpus
         if not is_allowed:
@@ -727,14 +732,14 @@ def get_gpu_with_sufficient_memory_for_action(
     # For smaller memory requirements, try to fit on a single GPU first
     if required_gpu_memory < 80000:
         logging.debug(
-            "Action %s: Required memory %d MB < 80000 MB - attempting single GPU allocation",
+            "Action %s: Required memory %d MB < 80000 MB - attempting single GPU allocation (selecting GPU with most free memory)",
             action_id,
             required_gpu_memory
         )
         try:
             single_gpu = get_single_gpu_with_sufficient_memory_for_action(action_details)
             logging.info(
-                "Action %s: Successfully allocated single GPU: %s",
+                "Action %s: Successfully allocated single GPU with most free memory: %s",
                 action_id,
                 single_gpu
             )
@@ -800,10 +805,10 @@ def get_single_gpu_with_sufficient_memory_for_action(
     action_details: dict,
 ) -> list:
     """
-    Get single GPU with sufficient memory using best-fit algorithm.
+    Get single GPU with sufficient memory using most-free algorithm.
-    Best-fit selects the GPU with the smallest amount of free memory
-    that still meets the requirements, minimizing fragmentation.
+    Selects the GPU with the MOST free memory that meets the requirements,
+    to balance load across GPUs and prevent any single GPU from being overused.
     Args:
         action_details (dict): Action details
@@ -818,7 +823,7 @@ def get_single_gpu_with_sufficient_memory_for_action(
     required_gpu_memory = get_required_gpu_memory(action_details)
     logging.debug(
-        "Action %s: Finding best-fit single GPU for %d MB",
+        "Action %s: Finding GPU with most free memory for %d MB",
         action_id,
         required_gpu_memory
     )
@@ -862,9 +867,9 @@ def get_single_gpu_with_sufficient_memory_for_action(
     if not memory_free_values:
         raise ValueError("No GPU devices found")
-    # Best-fit algorithm: find GPU with minimum free memory that meets requirement
+    # Most-free algorithm: find GPU with MAXIMUM free memory that meets requirement
     best_fit_gpu = None
-    best_fit_memory = float("inf")
+    best_fit_memory = 0  # Changed from float("inf") to 0
     for i, mem in enumerate(memory_free_values):
         # Check if GPU is in allowed list
@@ -887,12 +892,12 @@ def get_single_gpu_with_sufficient_memory_for_action(
                 required_gpu_memory
             )
-            # Best-fit: choose GPU with smallest sufficient memory
-            if mem < best_fit_memory:
+            # Most-free: choose GPU with MOST free memory to balance load
+            if mem > best_fit_memory:  # Changed from < to >
                 best_fit_gpu = i
                 best_fit_memory = mem
                 logging.debug(
-                    "Action %s: GPU %d is new best-fit candidate",
+                    "Action %s: GPU %d is new best candidate (most free memory)",
                     action_id,
                     i
                 )
@@ -907,7 +912,7 @@ def get_single_gpu_with_sufficient_memory_for_action(
     if best_fit_gpu is not None:
         logging.info(
-            "Action %s: Selected best-fit GPU %d with %d MB free (required: %d MB, waste: %d MB)",
+            "Action %s: Selected GPU %d with most free memory: %d MB free (required: %d MB, available: %d MB)",
             action_id,
             best_fit_gpu,
             best_fit_memory,
@@ -936,6 +941,120 @@ def get_single_gpu_with_sufficient_memory_for_action(
     raise ValueError(error_msg)
+@log_errors(default_return="", raise_exception=False)
+def get_gpu_config_for_deployment(action_details, is_first_deployment=False):
+    """Get GPU configuration for deployment actions.
+    For first deployment of a service, attempts to use all GPUs.
+    For subsequent deployments, uses standard GPU selection (most free memory).
+    Falls back gracefully to standard GPU selection if '--gpus all' is not available.
+    Args:
+        action_details (dict): Action details containing GPU requirements
+        is_first_deployment (bool): Whether this is the first deployment for this service
+    Returns:
+        str: GPU configuration string ('--gpus all' or '--gpus "device=X"' or '')
+    """
+    action_id = action_details.get("_id", "unknown")
+    # Check if GPU is required
+    gpu_required = action_details.get("actionDetails", {}).get("gpuRequired", False)
+    if not gpu_required:
+        logging.info(
+            "Action %s does not require GPU - will run on CPU",
+            action_id
+        )
+        return ""
+    # First deployment: try to use all GPUs
+    if is_first_deployment:
+        logging.info(
+            "Action %s: First deployment - attempting to use all GPUs",
+            action_id
+        )
+        try:
+            # Check if GPUs are available
+            result = subprocess.run(
+                ["nvidia-smi", "--query-gpu=count", "--format=csv,noheader"],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                timeout=5,
+                check=False,
+            )
+            if result.returncode == 0 and result.stdout.strip():
+                # GPUs are available, use all of them
+                logging.info(
+                    "Action %s: Using all GPUs for first deployment",
+                    action_id
+                )
+                return '--gpus all'
+            else:
+                logging.warning(
+                    "Action %s: No GPUs detected via nvidia-smi for first deployment, falling back to standard GPU selection",
+                    action_id
+                )
+        except Exception as e:
+            logging.warning(
+                "Action %s: Error checking GPU availability (%s), falling back to standard GPU selection",
+                action_id,
+                str(e)
+            )
+    # Fall back to standard GPU selection (most free memory)
+    # This also handles subsequent deployments
+    logging.info(
+        "Action %s: Using standard GPU allocation (most free memory)",
+        action_id
+    )
+    required_memory = action_details.get("actionDetails", {}).get(
+        "expectedResources", {}
+    ).get("gpuMemory", 0)
+    try:
+        # Get the GPU(s) with most free memory that have sufficient memory
+        gpu_indices = get_gpu_with_sufficient_memory_for_action(
+            action_details=action_details
+        )
+        if gpu_indices:
+            gpu_str = ",".join(map(str, gpu_indices))
+            logging.info(
+                "Action %s: Selected GPU device(s): %s (required memory: %d MB)",
+                action_id,
+                gpu_str,
+                required_memory
+            )
+            # Return Docker GPU configuration
+            return f'--gpus "device={gpu_str}"'
+        else:
+            logging.warning(
+                "Action %s: No GPUs with sufficient memory found (required: %d MB)",
+                action_id,
+                required_memory
+            )
+            return ""
+    except ValueError as e:
+        logging.error(
+            "Action %s: Error selecting GPU - %s",
+            action_id,
+            str(e)
+        )
+        return ""
+    except Exception as e:
+        logging.error(
+            "Action %s: Unexpected error in GPU selection - %s",
+            action_id,
+            str(e)
+        )
+        return ""
 @log_errors(default_return=(None, None), raise_exception=False)
 def get_decrypted_access_key_pair(
     enc_access_key: str,

{matrice_compute-0.1.23.dist-info → matrice_compute-0.1.25.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: matrice_compute
-Version: 0.1.23
+Version: 0.1.25
 Summary: Common server utilities for Matrice.ai services
 Author-email: "Matrice.ai" <dipendra@matrice.ai>
 License-Expression: MIT

{matrice_compute-0.1.23.dist-info → matrice_compute-0.1.25.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,17 @@
 matrice_compute/__init__.py,sha256=ZzQcFsT005VCgq9VZUh565f4upOooEb_FwZ6RgweNZs,597
-matrice_compute/action_instance.py,sha256=kByPDNzmq93RBhVdnhTqGRLj7JleKFnH9hnIoJo966o,66215
+matrice_compute/action_instance.py,sha256=SYUZrfj6dtcgEjeEgCyKlrc2p2o08jlW84Y__V4Aqew,69552
 matrice_compute/actions_manager.py,sha256=5U-xM6tl_Z6x96bi-c7AJM9ru80LqTN8f5Oce8dAu_A,7780
 matrice_compute/actions_scaledown_manager.py,sha256=pJ0nduNwHWZ10GnqJNx0Ok7cVWabQ_M8E2Vb9pH3A_k,2002
 matrice_compute/instance_manager.py,sha256=8USyX09ZxLvnVNIrjRogbyUeMCfgWnasuRqYkkVF4tQ,10146
-matrice_compute/instance_utils.py,sha256=tCI_A3L5iohw62acmlXuOJns0DjIkvwN4znlUAIkfbg,37863
+matrice_compute/instance_utils.py,sha256=N4yPDvNukFEEBngR0lEt4x_XT5hur1q0P-spM2xQIlU,42025
 matrice_compute/prechecks.py,sha256=W9YmNF3RcLhOf4U8WBlExvFqDw1aGWSNTlJtA73lbDQ,17196
 matrice_compute/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 matrice_compute/resources_tracker.py,sha256=pkdt0aVKx_TpY_Sq---73w9INkDffZZe3mZGlp1EftE,22573
 matrice_compute/scaling.py,sha256=CeT_lxJNkjJamRETG1lWaOtdSr5ySmcaMcqt7-lFRbo,23731
 matrice_compute/shutdown_manager.py,sha256=0MYV_AqygqR9NEntYf7atUC-PbWXyNkm1f-8c2aizgA,13234
 matrice_compute/task_utils.py,sha256=3qIutiQdYPyGRxH9ZwLbqdg8sZcnp6jp08pszWCRFl0,2820
-matrice_compute-0.1.23.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
-matrice_compute-0.1.23.dist-info/METADATA,sha256=7FCjLIs4y-5IfN9P8FRdcSbIZhPbeOC8Cg9ZSCUWr6o,1038
-matrice_compute-0.1.23.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-matrice_compute-0.1.23.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
-matrice_compute-0.1.23.dist-info/RECORD,,
+matrice_compute-0.1.25.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
+matrice_compute-0.1.25.dist-info/METADATA,sha256=YxPD7gjTuET4wsbq0ywgIw8AmR8U7-EdAuZlIVIramg,1038
+matrice_compute-0.1.25.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+matrice_compute-0.1.25.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
+matrice_compute-0.1.25.dist-info/RECORD,,

{matrice_compute-0.1.23.dist-info → matrice_compute-0.1.25.dist-info}/WHEEL RENAMED Viewed

File without changes

{matrice_compute-0.1.23.dist-info → matrice_compute-0.1.25.dist-info}/licenses/LICENSE.txt RENAMED Viewed

File without changes

{matrice_compute-0.1.23.dist-info → matrice_compute-0.1.25.dist-info}/top_level.txt RENAMED Viewed

File without changes

matrice-compute 0.1.23__py3-none-any.whl → 0.1.25__py3-none-any.whl

matrice-compute 0.1.23py3-none-any.whl → 0.1.25py3-none-any.whl