matrice-compute 0.1.23__py3-none-any.whl → 0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -285,13 +285,13 @@ class ActionInstance:
285
285
  ).get("gpuMemory", 0)
286
286
 
287
287
  logging.info(
288
- "Action %s requires GPU with %d MB memory - selecting best-fit GPU(s)",
288
+ "Action %s requires GPU with %d MB memory - selecting GPU(s) with most free memory",
289
289
  action_id,
290
290
  required_memory
291
291
  )
292
292
 
293
293
  try:
294
- # Get the best-fit GPU(s) with sufficient memory
294
+ # Get the GPU(s) with most free memory that have sufficient memory
295
295
  gpu_indices = get_gpu_with_sufficient_memory_for_action(
296
296
  action_details=action_details
297
297
  )
@@ -1463,7 +1463,7 @@ def model_deploy_execute(self: ActionInstance):
1463
1463
  )
1464
1464
 
1465
1465
  # Get GPU configuration based on requirements and availability
1466
- # This uses the best-fit algorithm to select the most appropriate GPU(s)
1466
+ # This selects the GPU(s) with the most free memory to balance load
1467
1467
  use_gpu = self.get_gpu_config(action_details)
1468
1468
 
1469
1469
  logging.info(
@@ -600,13 +600,18 @@ def is_allowed_gpu_device(gpu_index: int) -> bool:
600
600
  Returns:
601
601
  bool: True if GPU is allowed (or no filter is set), False otherwise
602
602
  """
603
- gpus = os.environ.get("GPUS")
604
- if not gpus:
605
- # No filter set - all GPUs are allowed
603
+ gpus = os.environ.get("GPUS", "").strip()
604
+ # No filter set or empty string - all GPUs are allowed
605
+ if not gpus or gpus == '""' or gpus == "''":
606
606
  return True
607
607
 
608
608
  try:
609
- allowed_gpus = [int(x) for x in gpus.split(",") if x.strip()]
609
+ allowed_gpus = [int(x.strip()) for x in gpus.split(",") if x.strip()]
610
+
611
+ # If no valid GPUs after parsing, allow all
612
+ if not allowed_gpus:
613
+ return True
614
+
610
615
  is_allowed = int(gpu_index) in allowed_gpus
611
616
 
612
617
  if not is_allowed:
@@ -727,14 +732,14 @@ def get_gpu_with_sufficient_memory_for_action(
727
732
  # For smaller memory requirements, try to fit on a single GPU first
728
733
  if required_gpu_memory < 80000:
729
734
  logging.debug(
730
- "Action %s: Required memory %d MB < 80000 MB - attempting single GPU allocation",
735
+ "Action %s: Required memory %d MB < 80000 MB - attempting single GPU allocation (selecting GPU with most free memory)",
731
736
  action_id,
732
737
  required_gpu_memory
733
738
  )
734
739
  try:
735
740
  single_gpu = get_single_gpu_with_sufficient_memory_for_action(action_details)
736
741
  logging.info(
737
- "Action %s: Successfully allocated single GPU: %s",
742
+ "Action %s: Successfully allocated single GPU with most free memory: %s",
738
743
  action_id,
739
744
  single_gpu
740
745
  )
@@ -800,10 +805,10 @@ def get_single_gpu_with_sufficient_memory_for_action(
800
805
  action_details: dict,
801
806
  ) -> list:
802
807
  """
803
- Get single GPU with sufficient memory using best-fit algorithm.
808
+ Get single GPU with sufficient memory using most-free algorithm.
804
809
 
805
- Best-fit selects the GPU with the smallest amount of free memory
806
- that still meets the requirements, minimizing fragmentation.
810
+ Selects the GPU with the MOST free memory that meets the requirements,
811
+ to balance load across GPUs and prevent any single GPU from being overused.
807
812
 
808
813
  Args:
809
814
  action_details (dict): Action details
@@ -818,7 +823,7 @@ def get_single_gpu_with_sufficient_memory_for_action(
818
823
  required_gpu_memory = get_required_gpu_memory(action_details)
819
824
 
820
825
  logging.debug(
821
- "Action %s: Finding best-fit single GPU for %d MB",
826
+ "Action %s: Finding GPU with most free memory for %d MB",
822
827
  action_id,
823
828
  required_gpu_memory
824
829
  )
@@ -862,9 +867,9 @@ def get_single_gpu_with_sufficient_memory_for_action(
862
867
  if not memory_free_values:
863
868
  raise ValueError("No GPU devices found")
864
869
 
865
- # Best-fit algorithm: find GPU with minimum free memory that meets requirement
870
+ # Most-free algorithm: find GPU with MAXIMUM free memory that meets requirement
866
871
  best_fit_gpu = None
867
- best_fit_memory = float("inf")
872
+ best_fit_memory = 0 # Changed from float("inf") to 0
868
873
 
869
874
  for i, mem in enumerate(memory_free_values):
870
875
  # Check if GPU is in allowed list
@@ -887,12 +892,12 @@ def get_single_gpu_with_sufficient_memory_for_action(
887
892
  required_gpu_memory
888
893
  )
889
894
 
890
- # Best-fit: choose GPU with smallest sufficient memory
891
- if mem < best_fit_memory:
895
+ # Most-free: choose GPU with MOST free memory to balance load
896
+ if mem > best_fit_memory: # Changed from < to >
892
897
  best_fit_gpu = i
893
898
  best_fit_memory = mem
894
899
  logging.debug(
895
- "Action %s: GPU %d is new best-fit candidate",
900
+ "Action %s: GPU %d is new best candidate (most free memory)",
896
901
  action_id,
897
902
  i
898
903
  )
@@ -907,7 +912,7 @@ def get_single_gpu_with_sufficient_memory_for_action(
907
912
 
908
913
  if best_fit_gpu is not None:
909
914
  logging.info(
910
- "Action %s: Selected best-fit GPU %d with %d MB free (required: %d MB, waste: %d MB)",
915
+ "Action %s: Selected GPU %d with most free memory: %d MB free (required: %d MB, available: %d MB)",
911
916
  action_id,
912
917
  best_fit_gpu,
913
918
  best_fit_memory,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: matrice_compute
3
- Version: 0.1.23
3
+ Version: 0.1.24
4
4
  Summary: Common server utilities for Matrice.ai services
5
5
  Author-email: "Matrice.ai" <dipendra@matrice.ai>
6
6
  License-Expression: MIT
@@ -1,17 +1,17 @@
1
1
  matrice_compute/__init__.py,sha256=ZzQcFsT005VCgq9VZUh565f4upOooEb_FwZ6RgweNZs,597
2
- matrice_compute/action_instance.py,sha256=kByPDNzmq93RBhVdnhTqGRLj7JleKFnH9hnIoJo966o,66215
2
+ matrice_compute/action_instance.py,sha256=NK_ZWvNDrLUeOzWwXjxrX7XP-lDHbx5-A0K8ByFpnUg,66241
3
3
  matrice_compute/actions_manager.py,sha256=5U-xM6tl_Z6x96bi-c7AJM9ru80LqTN8f5Oce8dAu_A,7780
4
4
  matrice_compute/actions_scaledown_manager.py,sha256=pJ0nduNwHWZ10GnqJNx0Ok7cVWabQ_M8E2Vb9pH3A_k,2002
5
5
  matrice_compute/instance_manager.py,sha256=8USyX09ZxLvnVNIrjRogbyUeMCfgWnasuRqYkkVF4tQ,10146
6
- matrice_compute/instance_utils.py,sha256=tCI_A3L5iohw62acmlXuOJns0DjIkvwN4znlUAIkfbg,37863
6
+ matrice_compute/instance_utils.py,sha256=xDOLo21G7unvlGTpnYQkEWSkyuAsVAcs4scOHy5Oxi4,38204
7
7
  matrice_compute/prechecks.py,sha256=W9YmNF3RcLhOf4U8WBlExvFqDw1aGWSNTlJtA73lbDQ,17196
8
8
  matrice_compute/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  matrice_compute/resources_tracker.py,sha256=pkdt0aVKx_TpY_Sq---73w9INkDffZZe3mZGlp1EftE,22573
10
10
  matrice_compute/scaling.py,sha256=CeT_lxJNkjJamRETG1lWaOtdSr5ySmcaMcqt7-lFRbo,23731
11
11
  matrice_compute/shutdown_manager.py,sha256=0MYV_AqygqR9NEntYf7atUC-PbWXyNkm1f-8c2aizgA,13234
12
12
  matrice_compute/task_utils.py,sha256=3qIutiQdYPyGRxH9ZwLbqdg8sZcnp6jp08pszWCRFl0,2820
13
- matrice_compute-0.1.23.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
14
- matrice_compute-0.1.23.dist-info/METADATA,sha256=7FCjLIs4y-5IfN9P8FRdcSbIZhPbeOC8Cg9ZSCUWr6o,1038
15
- matrice_compute-0.1.23.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
16
- matrice_compute-0.1.23.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
17
- matrice_compute-0.1.23.dist-info/RECORD,,
13
+ matrice_compute-0.1.24.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
14
+ matrice_compute-0.1.24.dist-info/METADATA,sha256=5fsmPC37r0KPPd6h0qQXnvm0dFqLqboVInQdv7KCr5Y,1038
15
+ matrice_compute-0.1.24.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
16
+ matrice_compute-0.1.24.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
17
+ matrice_compute-0.1.24.dist-info/RECORD,,