matrice-compute 0.1.23__py3-none-any.whl → 0.1.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,7 @@ import signal
10
10
  import urllib.request
11
11
  from matrice_compute.instance_utils import (
12
12
  get_gpu_with_sufficient_memory_for_action,
13
+ get_gpu_config_for_deployment,
13
14
  get_decrypted_access_key_pair,
14
15
  get_max_file_system,
15
16
  get_best_service_ip_and_network,
@@ -26,6 +27,10 @@ from matrice_common.utils import log_errors
26
27
  class ActionInstance:
27
28
  """Base class for tasks that run in Action containers."""
28
29
 
30
+ # Class-level dictionary to track deployed services and their ports
31
+ # Key: _idService, Value: {"triton_ports": "port1,port2,port3", "is_first": False}
32
+ _deployed_services = {}
33
+
29
34
  def __init__(self, scaling: Scaling, action_info: dict):
30
35
  """Initialize an action instance.
31
36
 
@@ -84,6 +89,67 @@ class ActionInstance:
84
89
  raise ValueError(f"Unknown action type: {self.action_type}")
85
90
  self.task = self.actions_map[self.action_type]
86
91
 
92
+ @classmethod
93
+ def is_first_deployment_for_service(cls, service_id):
94
+ """Check if this is the first deployment for a given service.
95
+
96
+ Args:
97
+ service_id (str): Service ID (_idService)
98
+
99
+ Returns:
100
+ bool: True if this is the first deployment, False otherwise
101
+ """
102
+ if not service_id:
103
+ return False
104
+ return service_id not in cls._deployed_services
105
+
106
+ @classmethod
107
+ def get_or_create_triton_ports(cls, service_id, scaling_instance):
108
+ """Get existing TRITON_PORTS for a service or create new ones.
109
+
110
+ Args:
111
+ service_id (str): Service ID (_idService)
112
+ scaling_instance: Scaling instance to get open ports
113
+
114
+ Returns:
115
+ str: Comma-separated string of 3 port numbers (e.g., "8001,8002,8003")
116
+ """
117
+ if not service_id:
118
+ # No service_id, generate new ports
119
+ port1 = scaling_instance.get_open_port()
120
+ port2 = scaling_instance.get_open_port()
121
+ port3 = scaling_instance.get_open_port()
122
+ return f"{port1},{port2},{port3}"
123
+
124
+ # Check if ports already exist for this service
125
+ if service_id in cls._deployed_services:
126
+ triton_ports = cls._deployed_services[service_id]["triton_ports"]
127
+ logging.info(
128
+ "Reusing TRITON_PORTS for service %s: %s",
129
+ service_id,
130
+ triton_ports
131
+ )
132
+ return triton_ports
133
+
134
+ # First deployment: generate new ports and store them
135
+ port1 = scaling_instance.get_open_port()
136
+ port2 = scaling_instance.get_open_port()
137
+ port3 = scaling_instance.get_open_port()
138
+ triton_ports = f"{port1},{port2},{port3}"
139
+
140
+ # Store for future use
141
+ cls._deployed_services[service_id] = {
142
+ "triton_ports": triton_ports,
143
+ "is_first": False
144
+ }
145
+
146
+ logging.info(
147
+ "First deployment for service %s - generated TRITON_PORTS: %s",
148
+ service_id,
149
+ triton_ports
150
+ )
151
+ return triton_ports
152
+
87
153
  @log_errors(default_return={}, raise_exception=True, log_error=False)
88
154
  def _init_credentials(self):
89
155
  """Initialize Matrice credentials.
@@ -285,13 +351,13 @@ class ActionInstance:
285
351
  ).get("gpuMemory", 0)
286
352
 
287
353
  logging.info(
288
- "Action %s requires GPU with %d MB memory - selecting best-fit GPU(s)",
354
+ "Action %s requires GPU with %d MB memory - selecting GPU(s) with most free memory",
289
355
  action_id,
290
356
  required_memory
291
357
  )
292
358
 
293
359
  try:
294
- # Get the best-fit GPU(s) with sufficient memory
360
+ # Get the GPU(s) with most free memory that have sufficient memory
295
361
  gpu_indices = get_gpu_with_sufficient_memory_for_action(
296
362
  action_details=action_details
297
363
  )
@@ -1387,10 +1453,27 @@ def redis_setup_execute(self: ActionInstance):
1387
1453
  f"docker run -d --net=host "
1388
1454
  f"--name redis_container_{int(time.time())} "
1389
1455
  f"--restart unless-stopped "
1456
+ f"--memory=32g "
1457
+ f"--cpus=8 "
1390
1458
  f"{redis_image} "
1391
- f"redis-server --bind 0.0.0.0 --appendonly yes --requirepass {redis_password}"
1459
+ f"redis-server --bind 0.0.0.0 "
1460
+ f"--appendonly no "
1461
+ f'--save "" '
1462
+ f"--maxmemory 30gb "
1463
+ f"--maxmemory-policy allkeys-lru "
1464
+ f"--io-threads 4 "
1465
+ f"--io-threads-do-reads yes "
1466
+ f"--stream-node-max-bytes 8192 "
1467
+ f"--stream-node-max-entries 1000 "
1468
+ f"--hz 100 "
1469
+ f"--tcp-backlog 2048 "
1470
+ f"--timeout 0 "
1471
+ f"--lazyfree-lazy-eviction yes "
1472
+ f"--lazyfree-lazy-expire yes "
1473
+ f"--lazyfree-lazy-server-del yes "
1474
+ f"--activedefrag yes "
1475
+ f"--requirepass {redis_password}"
1392
1476
  )
1393
-
1394
1477
  logging.info("Starting Redis container on %s:6379: %s", redis_host, redis_cmd)
1395
1478
 
1396
1479
  # Start Redis container first
@@ -1455,6 +1538,10 @@ def model_deploy_execute(self: ActionInstance):
1455
1538
  return
1456
1539
  action_id = action_details["_id"]
1457
1540
  model_family = action_details["actionDetails"]["modelFamily"]
1541
+
1542
+ # Get the service ID to track deployments
1543
+ service_id = action_details.get("_idService")
1544
+
1458
1545
  self.setup_action_requirements(
1459
1546
  action_details,
1460
1547
  work_fs,
@@ -1462,17 +1549,27 @@ def model_deploy_execute(self: ActionInstance):
1462
1549
  action_id=action_id,
1463
1550
  )
1464
1551
 
1465
- # Get GPU configuration based on requirements and availability
1466
- # This uses the best-fit algorithm to select the most appropriate GPU(s)
1467
- use_gpu = self.get_gpu_config(action_details)
1552
+ # Check if this is the first deployment for this service
1553
+ is_first_deployment = ActionInstance.is_first_deployment_for_service(service_id)
1554
+
1555
+ # Get GPU configuration (uses utility function with fail-safe fallback)
1556
+ use_gpu = get_gpu_config_for_deployment(action_details, is_first_deployment)
1468
1557
 
1469
1558
  logging.info(
1470
- "Action %s: Model deployment GPU config: %s",
1559
+ "Action %s: Model deployment GPU config: %s (first_deployment=%s)",
1471
1560
  action_id,
1472
- use_gpu if use_gpu else "CPU-only"
1561
+ use_gpu if use_gpu else "CPU-only",
1562
+ is_first_deployment
1473
1563
  )
1474
1564
 
1475
- extra_env_vars = {"INTERNAL_PORT": internal_port}
1565
+ # Get or create TRITON_PORTS (uses utility method)
1566
+ triton_ports = ActionInstance.get_or_create_triton_ports(service_id, self.scaling)
1567
+
1568
+ extra_env_vars = {
1569
+ "INTERNAL_PORT": internal_port,
1570
+ "TRITON_PORTS": triton_ports
1571
+ }
1572
+
1476
1573
  cmd = f'{self.get_base_docker_cmd(work_fs, use_gpu, mount_docker_sock=True, action_id=action_id, extra_env_vars=extra_env_vars, extra_pkgs=["matrice_inference", "matrice_analytics"])} python3 deploy.py {self.action_record_id} {external_port}"'
1477
1574
  logging.info("cmd is: %s", cmd)
1478
1575
  self.start(cmd, "deploy_log")
@@ -600,13 +600,18 @@ def is_allowed_gpu_device(gpu_index: int) -> bool:
600
600
  Returns:
601
601
  bool: True if GPU is allowed (or no filter is set), False otherwise
602
602
  """
603
- gpus = os.environ.get("GPUS")
604
- if not gpus:
605
- # No filter set - all GPUs are allowed
603
+ gpus = os.environ.get("GPUS", "").strip()
604
+ # No filter set or empty string - all GPUs are allowed
605
+ if not gpus or gpus == '""' or gpus == "''":
606
606
  return True
607
607
 
608
608
  try:
609
- allowed_gpus = [int(x) for x in gpus.split(",") if x.strip()]
609
+ allowed_gpus = [int(x.strip()) for x in gpus.split(",") if x.strip()]
610
+
611
+ # If no valid GPUs after parsing, allow all
612
+ if not allowed_gpus:
613
+ return True
614
+
610
615
  is_allowed = int(gpu_index) in allowed_gpus
611
616
 
612
617
  if not is_allowed:
@@ -727,14 +732,14 @@ def get_gpu_with_sufficient_memory_for_action(
727
732
  # For smaller memory requirements, try to fit on a single GPU first
728
733
  if required_gpu_memory < 80000:
729
734
  logging.debug(
730
- "Action %s: Required memory %d MB < 80000 MB - attempting single GPU allocation",
735
+ "Action %s: Required memory %d MB < 80000 MB - attempting single GPU allocation (selecting GPU with most free memory)",
731
736
  action_id,
732
737
  required_gpu_memory
733
738
  )
734
739
  try:
735
740
  single_gpu = get_single_gpu_with_sufficient_memory_for_action(action_details)
736
741
  logging.info(
737
- "Action %s: Successfully allocated single GPU: %s",
742
+ "Action %s: Successfully allocated single GPU with most free memory: %s",
738
743
  action_id,
739
744
  single_gpu
740
745
  )
@@ -800,10 +805,10 @@ def get_single_gpu_with_sufficient_memory_for_action(
800
805
  action_details: dict,
801
806
  ) -> list:
802
807
  """
803
- Get single GPU with sufficient memory using best-fit algorithm.
808
+ Get single GPU with sufficient memory using most-free algorithm.
804
809
 
805
- Best-fit selects the GPU with the smallest amount of free memory
806
- that still meets the requirements, minimizing fragmentation.
810
+ Selects the GPU with the MOST free memory that meets the requirements,
811
+ to balance load across GPUs and prevent any single GPU from being overused.
807
812
 
808
813
  Args:
809
814
  action_details (dict): Action details
@@ -818,7 +823,7 @@ def get_single_gpu_with_sufficient_memory_for_action(
818
823
  required_gpu_memory = get_required_gpu_memory(action_details)
819
824
 
820
825
  logging.debug(
821
- "Action %s: Finding best-fit single GPU for %d MB",
826
+ "Action %s: Finding GPU with most free memory for %d MB",
822
827
  action_id,
823
828
  required_gpu_memory
824
829
  )
@@ -862,9 +867,9 @@ def get_single_gpu_with_sufficient_memory_for_action(
862
867
  if not memory_free_values:
863
868
  raise ValueError("No GPU devices found")
864
869
 
865
- # Best-fit algorithm: find GPU with minimum free memory that meets requirement
870
+ # Most-free algorithm: find GPU with MAXIMUM free memory that meets requirement
866
871
  best_fit_gpu = None
867
- best_fit_memory = float("inf")
872
+ best_fit_memory = 0 # Changed from float("inf") to 0
868
873
 
869
874
  for i, mem in enumerate(memory_free_values):
870
875
  # Check if GPU is in allowed list
@@ -887,12 +892,12 @@ def get_single_gpu_with_sufficient_memory_for_action(
887
892
  required_gpu_memory
888
893
  )
889
894
 
890
- # Best-fit: choose GPU with smallest sufficient memory
891
- if mem < best_fit_memory:
895
+ # Most-free: choose GPU with MOST free memory to balance load
896
+ if mem > best_fit_memory: # Changed from < to >
892
897
  best_fit_gpu = i
893
898
  best_fit_memory = mem
894
899
  logging.debug(
895
- "Action %s: GPU %d is new best-fit candidate",
900
+ "Action %s: GPU %d is new best candidate (most free memory)",
896
901
  action_id,
897
902
  i
898
903
  )
@@ -907,7 +912,7 @@ def get_single_gpu_with_sufficient_memory_for_action(
907
912
 
908
913
  if best_fit_gpu is not None:
909
914
  logging.info(
910
- "Action %s: Selected best-fit GPU %d with %d MB free (required: %d MB, waste: %d MB)",
915
+ "Action %s: Selected GPU %d with most free memory: %d MB free (required: %d MB, available: %d MB)",
911
916
  action_id,
912
917
  best_fit_gpu,
913
918
  best_fit_memory,
@@ -936,6 +941,120 @@ def get_single_gpu_with_sufficient_memory_for_action(
936
941
  raise ValueError(error_msg)
937
942
 
938
943
 
944
+ @log_errors(default_return="", raise_exception=False)
945
+ def get_gpu_config_for_deployment(action_details, is_first_deployment=False):
946
+ """Get GPU configuration for deployment actions.
947
+
948
+ For first deployment of a service, attempts to use all GPUs.
949
+ For subsequent deployments, uses standard GPU selection (most free memory).
950
+ Falls back gracefully to standard GPU selection if '--gpus all' is not available.
951
+
952
+ Args:
953
+ action_details (dict): Action details containing GPU requirements
954
+ is_first_deployment (bool): Whether this is the first deployment for this service
955
+
956
+ Returns:
957
+ str: GPU configuration string ('--gpus all' or '--gpus "device=X"' or '')
958
+ """
959
+ action_id = action_details.get("_id", "unknown")
960
+
961
+ # Check if GPU is required
962
+ gpu_required = action_details.get("actionDetails", {}).get("gpuRequired", False)
963
+ if not gpu_required:
964
+ logging.info(
965
+ "Action %s does not require GPU - will run on CPU",
966
+ action_id
967
+ )
968
+ return ""
969
+
970
+ # First deployment: try to use all GPUs
971
+ if is_first_deployment:
972
+ logging.info(
973
+ "Action %s: First deployment - attempting to use all GPUs",
974
+ action_id
975
+ )
976
+
977
+ try:
978
+ # Check if GPUs are available
979
+ result = subprocess.run(
980
+ ["nvidia-smi", "--query-gpu=count", "--format=csv,noheader"],
981
+ stdout=subprocess.PIPE,
982
+ stderr=subprocess.PIPE,
983
+ timeout=5,
984
+ check=False,
985
+ )
986
+
987
+ if result.returncode == 0 and result.stdout.strip():
988
+ # GPUs are available, use all of them
989
+ logging.info(
990
+ "Action %s: Using all GPUs for first deployment",
991
+ action_id
992
+ )
993
+ return '--gpus all'
994
+ else:
995
+ logging.warning(
996
+ "Action %s: No GPUs detected via nvidia-smi for first deployment, falling back to standard GPU selection",
997
+ action_id
998
+ )
999
+ except Exception as e:
1000
+ logging.warning(
1001
+ "Action %s: Error checking GPU availability (%s), falling back to standard GPU selection",
1002
+ action_id,
1003
+ str(e)
1004
+ )
1005
+
1006
+ # Fall back to standard GPU selection (most free memory)
1007
+ # This also handles subsequent deployments
1008
+ logging.info(
1009
+ "Action %s: Using standard GPU allocation (most free memory)",
1010
+ action_id
1011
+ )
1012
+
1013
+ required_memory = action_details.get("actionDetails", {}).get(
1014
+ "expectedResources", {}
1015
+ ).get("gpuMemory", 0)
1016
+
1017
+ try:
1018
+ # Get the GPU(s) with most free memory that have sufficient memory
1019
+ gpu_indices = get_gpu_with_sufficient_memory_for_action(
1020
+ action_details=action_details
1021
+ )
1022
+
1023
+ if gpu_indices:
1024
+ gpu_str = ",".join(map(str, gpu_indices))
1025
+ logging.info(
1026
+ "Action %s: Selected GPU device(s): %s (required memory: %d MB)",
1027
+ action_id,
1028
+ gpu_str,
1029
+ required_memory
1030
+ )
1031
+
1032
+ # Return Docker GPU configuration
1033
+ return f'--gpus "device={gpu_str}"'
1034
+ else:
1035
+ logging.warning(
1036
+ "Action %s: No GPUs with sufficient memory found (required: %d MB)",
1037
+ action_id,
1038
+ required_memory
1039
+ )
1040
+ return ""
1041
+
1042
+ except ValueError as e:
1043
+ logging.error(
1044
+ "Action %s: Error selecting GPU - %s",
1045
+ action_id,
1046
+ str(e)
1047
+ )
1048
+ return ""
1049
+ except Exception as e:
1050
+ logging.error(
1051
+ "Action %s: Unexpected error in GPU selection - %s",
1052
+ action_id,
1053
+ str(e)
1054
+ )
1055
+ return ""
1056
+
1057
+
939
1058
  @log_errors(default_return=(None, None), raise_exception=False)
940
1059
  def get_decrypted_access_key_pair(
941
1060
  enc_access_key: str,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: matrice_compute
3
- Version: 0.1.23
3
+ Version: 0.1.25
4
4
  Summary: Common server utilities for Matrice.ai services
5
5
  Author-email: "Matrice.ai" <dipendra@matrice.ai>
6
6
  License-Expression: MIT
@@ -1,17 +1,17 @@
1
1
  matrice_compute/__init__.py,sha256=ZzQcFsT005VCgq9VZUh565f4upOooEb_FwZ6RgweNZs,597
2
- matrice_compute/action_instance.py,sha256=kByPDNzmq93RBhVdnhTqGRLj7JleKFnH9hnIoJo966o,66215
2
+ matrice_compute/action_instance.py,sha256=SYUZrfj6dtcgEjeEgCyKlrc2p2o08jlW84Y__V4Aqew,69552
3
3
  matrice_compute/actions_manager.py,sha256=5U-xM6tl_Z6x96bi-c7AJM9ru80LqTN8f5Oce8dAu_A,7780
4
4
  matrice_compute/actions_scaledown_manager.py,sha256=pJ0nduNwHWZ10GnqJNx0Ok7cVWabQ_M8E2Vb9pH3A_k,2002
5
5
  matrice_compute/instance_manager.py,sha256=8USyX09ZxLvnVNIrjRogbyUeMCfgWnasuRqYkkVF4tQ,10146
6
- matrice_compute/instance_utils.py,sha256=tCI_A3L5iohw62acmlXuOJns0DjIkvwN4znlUAIkfbg,37863
6
+ matrice_compute/instance_utils.py,sha256=N4yPDvNukFEEBngR0lEt4x_XT5hur1q0P-spM2xQIlU,42025
7
7
  matrice_compute/prechecks.py,sha256=W9YmNF3RcLhOf4U8WBlExvFqDw1aGWSNTlJtA73lbDQ,17196
8
8
  matrice_compute/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  matrice_compute/resources_tracker.py,sha256=pkdt0aVKx_TpY_Sq---73w9INkDffZZe3mZGlp1EftE,22573
10
10
  matrice_compute/scaling.py,sha256=CeT_lxJNkjJamRETG1lWaOtdSr5ySmcaMcqt7-lFRbo,23731
11
11
  matrice_compute/shutdown_manager.py,sha256=0MYV_AqygqR9NEntYf7atUC-PbWXyNkm1f-8c2aizgA,13234
12
12
  matrice_compute/task_utils.py,sha256=3qIutiQdYPyGRxH9ZwLbqdg8sZcnp6jp08pszWCRFl0,2820
13
- matrice_compute-0.1.23.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
14
- matrice_compute-0.1.23.dist-info/METADATA,sha256=7FCjLIs4y-5IfN9P8FRdcSbIZhPbeOC8Cg9ZSCUWr6o,1038
15
- matrice_compute-0.1.23.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
16
- matrice_compute-0.1.23.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
17
- matrice_compute-0.1.23.dist-info/RECORD,,
13
+ matrice_compute-0.1.25.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
14
+ matrice_compute-0.1.25.dist-info/METADATA,sha256=YxPD7gjTuET4wsbq0ywgIw8AmR8U7-EdAuZlIVIramg,1038
15
+ matrice_compute-0.1.25.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
16
+ matrice_compute-0.1.25.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
17
+ matrice_compute-0.1.25.dist-info/RECORD,,