mmgp 3.2.7__tar.gz → 3.2.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmgp might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mmgp
3
- Version: 3.2.7
3
+ Version: 3.2.8
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  License: GNU GENERAL PUBLIC LICENSE
@@ -16,7 +16,7 @@ Requires-Dist: psutil
16
16
 
17
17
 
18
18
  <p align="center">
19
- <H2>Memory Management 3.2.7 for the GPU Poor by DeepBeepMeep</H2>
19
+ <H2>Memory Management 3.2.8 for the GPU Poor by DeepBeepMeep</H2>
20
20
  </p>
21
21
 
22
22
 
@@ -1,6 +1,6 @@
1
1
 
2
2
  <p align="center">
3
- <H2>Memory Management 3.2.7 for the GPU Poor by DeepBeepMeep</H2>
3
+ <H2>Memory Management 3.2.8 for the GPU Poor by DeepBeepMeep</H2>
4
4
  </p>
5
5
 
6
6
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mmgp"
3
- version = "3.2.7"
3
+ version = "3.2.8"
4
4
  authors = [
5
5
  { name = "deepbeepmeep", email = "deepbeepmeep@yahoo.com" },
6
6
  ]
@@ -1,4 +1,4 @@
1
- # ------------------ Memory Management 3.2.7 for the GPU Poor by DeepBeepMeep (mmgp)------------------
1
+ # ------------------ Memory Management 3.2.8 for the GPU Poor by DeepBeepMeep (mmgp)------------------
2
2
  #
3
3
  # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
4
4
  # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -575,7 +575,7 @@ def _welcome():
575
575
  if welcome_displayed:
576
576
  return
577
577
  welcome_displayed = True
578
- print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.7) by DeepBeepMeep ************{ENDC}{UNBOLD}")
578
+ print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.8) by DeepBeepMeep ************{ENDC}{UNBOLD}")
579
579
 
580
580
  def _extract_num_from_str(num_in_str):
581
581
  size = len(num_in_str)
@@ -910,6 +910,9 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
910
910
  skip = False
911
911
  state_dict = safetensors2.torch_load_file(path)
912
912
 
913
+
914
+
915
+
913
916
  if preprocess_sd != None:
914
917
  state_dict = preprocess_sd(state_dict)
915
918
 
@@ -947,42 +950,34 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
947
950
  # tied_weights = _extract_tie_weights_from_sd(state_dict, path) # to do
948
951
 
949
952
  clean_up = False
950
- keys = list(state_dict.keys())
951
- if len(keys) == 0:
953
+ first_key = next(iter(state_dict), None)
954
+ if first_key == None:
952
955
  msg = f"Empty Lora '{path}'"
953
956
  error_msg = append(error_msg, msg)
954
957
  fail = True
955
958
 
956
959
  if not fail:
957
- network_alphas = {}
958
- for k in keys:
959
- if "alpha" in k:
960
- alpha_value = state_dict.pop(k)
961
- if not ( (torch.is_tensor(alpha_value) and torch.is_floating_point(alpha_value)) or isinstance(
962
- alpha_value, float
963
- )):
964
- network_alphas[k] = torch.tensor( float(alpha_value.item() ) )
965
-
966
- pos = keys[0].find(".")
967
- prefix = keys[0][0:pos]
960
+ pos = first_key.find(".")
961
+ prefix = first_key[0:pos]
968
962
  if prefix not in ["diffusion_model", "transformer"]:
969
963
  msg = f"No compatible weight was found in Lora file '{path}'. Please check that it is compatible with the Diffusers format."
970
964
  error_msg = append(error_msg, msg)
971
965
  fail = True
972
966
 
973
967
  if not fail:
968
+
974
969
  state_dict = { k[ len(prefix) + 1:]: v for k, v in state_dict.items() if k.startswith(prefix) }
975
- rank = {}
976
970
  clean_up = True
977
971
 
978
- # for key, val in state_dict.items():
979
- # if "lora_B" in key:
980
- # rank[key] = val.shape[1]
972
+ keys = list(state_dict.keys())
981
973
 
982
- # if network_alphas is not None and len(network_alphas) >= 1:
983
- # alpha_keys = [k for k in network_alphas.keys() if k.startswith(prefix) and k.split(".")[0] == prefix]
984
- # network_alphas = {k.replace(f"{prefix}.", ""): v for k, v in network_alphas.items() if k in alpha_keys}
985
- network_alphas = None
974
+ lora_alphas = {}
975
+ for k in keys:
976
+ if "alpha" in k:
977
+ alpha_value = state_dict.pop(k)
978
+ if torch.is_tensor(alpha_value):
979
+ alpha_value = float(alpha_value.item())
980
+ lora_alphas[k] = alpha_value
986
981
 
987
982
  invalid_keys = []
988
983
  unexpected_keys = []
@@ -1037,14 +1032,18 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
1037
1032
  loras_module_data = dict()
1038
1033
  loras_model_data[module] = loras_module_data
1039
1034
  loras_adapter_data = loras_module_data.get(adapter_name, None)
1035
+ lora_A = None if lora_A == None else lora_A.to(torch.bfloat16)
1036
+ lora_B = None if lora_B == None else lora_B.to(torch.bfloat16)
1040
1037
  if loras_adapter_data == None:
1041
- loras_adapter_data = [lora_A, lora_B]
1038
+ alpha = lora_alphas.get(k[:-len("lora_X.weight")] + "alpha", 1.)
1039
+ loras_adapter_data = [lora_A, lora_B, alpha]
1042
1040
  loras_module_data[adapter_name] = loras_adapter_data
1043
1041
  elif lora_A != None:
1044
1042
  loras_adapter_data[0] = lora_A
1045
1043
  else:
1046
1044
  loras_adapter_data[1] = lora_B
1047
1045
  lora_A, lora_B, v, loras_module_data, loras_adapter_data = None, None, None, None, None
1046
+ lora_alphas = None
1048
1047
 
1049
1048
  if len(invalid_keys) > 0:
1050
1049
  msg = "Lora '{path}' contains non Lora keys '{trunc(invalid_keys,200)}'"
@@ -1548,10 +1547,10 @@ class offload:
1548
1547
  lora_data = lora_module.get(adapter, None)
1549
1548
  if lora_data == None:
1550
1549
  continue
1551
- lora_A, lora_B = lora_data
1550
+ lora_A, lora_B, alpha = lora_data
1552
1551
  key = adapter + '_GPU'
1553
1552
  if to_GPU:
1554
- lora_module[key] = [lora_A.cuda(), lora_B.cuda()]
1553
+ lora_module[key] = [lora_A.cuda(), lora_B.cuda(), alpha]
1555
1554
  elif key in lora_module:
1556
1555
  del lora_module[key]
1557
1556
 
@@ -1801,8 +1800,8 @@ class offload:
1801
1800
  data = loras_data.get(active_adapter + '_GPU', None)
1802
1801
  if data == None:
1803
1802
  continue
1804
- lora_A_weight, lora_B_weight = data
1805
- scaling = get_scaling(active_adapter)
1803
+ lora_A_weight, lora_B_weight, alpha = data
1804
+ scaling = get_scaling(active_adapter) * alpha
1806
1805
  weight.addmm_(lora_B_weight, lora_A_weight, alpha= scaling )
1807
1806
  # base_weight += scaling * lora_B_weight @ lora_A_weight
1808
1807
 
@@ -1822,9 +1821,9 @@ class offload:
1822
1821
  data = loras_data.get(active_adapter + '_GPU', None)
1823
1822
  if data == None:
1824
1823
  continue
1825
- lora_A, lora_B = data
1824
+ lora_A, lora_B, alpha = data
1826
1825
  # dropout = self.lora_dropout[active_adapter]
1827
- scaling = get_scaling(active_adapter)
1826
+ scaling = get_scaling(active_adapter) * alpha
1828
1827
  x = x.to(lora_A.dtype)
1829
1828
 
1830
1829
  if training:
@@ -2248,7 +2247,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, l
2248
2247
  print(f"Model '{model_id}' already pinned to reserved memory")
2249
2248
  else:
2250
2249
  _pin_to_memory(current_model, model_id, partialPinning= partialPinning, pinnedPEFTLora = pinnedPEFTLora, verboseLevel=verboseLevel)
2251
-
2250
+
2252
2251
  current_budget = model_budgets[model_id]
2253
2252
  cur_blocks_prefix, prev_blocks_name, cur_blocks_name,cur_blocks_seq, is_mod_seq = None, None, None, -1, False
2254
2253
  self.loaded_blocks[model_id] = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mmgp
3
- Version: 3.2.7
3
+ Version: 3.2.8
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  License: GNU GENERAL PUBLIC LICENSE
@@ -16,7 +16,7 @@ Requires-Dist: psutil
16
16
 
17
17
 
18
18
  <p align="center">
19
- <H2>Memory Management 3.2.7 for the GPU Poor by DeepBeepMeep</H2>
19
+ <H2>Memory Management 3.2.8 for the GPU Poor by DeepBeepMeep</H2>
20
20
  </p>
21
21
 
22
22
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes