mmgp 3.2.7__py3-none-any.whl → 3.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmgp might be problematic. Click here for more details.

mmgp/offload.py CHANGED
@@ -1,4 +1,4 @@
1
- # ------------------ Memory Management 3.2.7 for the GPU Poor by DeepBeepMeep (mmgp)------------------
1
+ # ------------------ Memory Management 3.3.0 for the GPU Poor by DeepBeepMeep (mmgp)------------------
2
2
  #
3
3
  # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
4
4
  # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -575,7 +575,7 @@ def _welcome():
575
575
  if welcome_displayed:
576
576
  return
577
577
  welcome_displayed = True
578
- print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.7) by DeepBeepMeep ************{ENDC}{UNBOLD}")
578
+ print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.8) by DeepBeepMeep ************{ENDC}{UNBOLD}")
579
579
 
580
580
  def _extract_num_from_str(num_in_str):
581
581
  size = len(num_in_str)
@@ -882,10 +882,11 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
882
882
  return source + CrLf + text
883
883
 
884
884
  def trunc(text, sz):
885
+ text = str(text)
885
886
  if len(text) < sz:
886
- return str(text)
887
+ return text
887
888
  else:
888
- return str(text)[0:sz] + '...'
889
+ return text[0:sz] + '...'
889
890
 
890
891
  if not isinstance(lora_path, list):
891
892
  lora_path = [lora_path]
@@ -910,6 +911,9 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
910
911
  skip = False
911
912
  state_dict = safetensors2.torch_load_file(path)
912
913
 
914
+
915
+
916
+
913
917
  if preprocess_sd != None:
914
918
  state_dict = preprocess_sd(state_dict)
915
919
 
@@ -947,42 +951,34 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
947
951
  # tied_weights = _extract_tie_weights_from_sd(state_dict, path) # to do
948
952
 
949
953
  clean_up = False
950
- keys = list(state_dict.keys())
951
- if len(keys) == 0:
954
+ first_key = next(iter(state_dict), None)
955
+ if first_key == None:
952
956
  msg = f"Empty Lora '{path}'"
953
957
  error_msg = append(error_msg, msg)
954
958
  fail = True
955
959
 
956
960
  if not fail:
957
- network_alphas = {}
958
- for k in keys:
959
- if "alpha" in k:
960
- alpha_value = state_dict.pop(k)
961
- if not ( (torch.is_tensor(alpha_value) and torch.is_floating_point(alpha_value)) or isinstance(
962
- alpha_value, float
963
- )):
964
- network_alphas[k] = torch.tensor( float(alpha_value.item() ) )
965
-
966
- pos = keys[0].find(".")
967
- prefix = keys[0][0:pos]
961
+ pos = first_key.find(".")
962
+ prefix = first_key[0:pos]
968
963
  if prefix not in ["diffusion_model", "transformer"]:
969
964
  msg = f"No compatible weight was found in Lora file '{path}'. Please check that it is compatible with the Diffusers format."
970
965
  error_msg = append(error_msg, msg)
971
966
  fail = True
972
967
 
973
968
  if not fail:
969
+
974
970
  state_dict = { k[ len(prefix) + 1:]: v for k, v in state_dict.items() if k.startswith(prefix) }
975
- rank = {}
976
971
  clean_up = True
977
972
 
978
- # for key, val in state_dict.items():
979
- # if "lora_B" in key:
980
- # rank[key] = val.shape[1]
973
+ keys = list(state_dict.keys())
981
974
 
982
- # if network_alphas is not None and len(network_alphas) >= 1:
983
- # alpha_keys = [k for k in network_alphas.keys() if k.startswith(prefix) and k.split(".")[0] == prefix]
984
- # network_alphas = {k.replace(f"{prefix}.", ""): v for k, v in network_alphas.items() if k in alpha_keys}
985
- network_alphas = None
975
+ lora_alphas = {}
976
+ for k in keys:
977
+ if "alpha" in k:
978
+ alpha_value = state_dict.pop(k)
979
+ if torch.is_tensor(alpha_value):
980
+ alpha_value = float(alpha_value.item())
981
+ lora_alphas[k] = alpha_value
986
982
 
987
983
  invalid_keys = []
988
984
  unexpected_keys = []
@@ -1037,14 +1033,18 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
1037
1033
  loras_module_data = dict()
1038
1034
  loras_model_data[module] = loras_module_data
1039
1035
  loras_adapter_data = loras_module_data.get(adapter_name, None)
1036
+ lora_A = None if lora_A == None else lora_A.to(torch.bfloat16)
1037
+ lora_B = None if lora_B == None else lora_B.to(torch.bfloat16)
1040
1038
  if loras_adapter_data == None:
1041
- loras_adapter_data = [lora_A, lora_B]
1039
+ alpha = lora_alphas.get(k[:-len("lora_X.weight")] + "alpha", 1.)
1040
+ loras_adapter_data = [lora_A, lora_B, alpha]
1042
1041
  loras_module_data[adapter_name] = loras_adapter_data
1043
1042
  elif lora_A != None:
1044
1043
  loras_adapter_data[0] = lora_A
1045
1044
  else:
1046
1045
  loras_adapter_data[1] = lora_B
1047
1046
  lora_A, lora_B, v, loras_module_data, loras_adapter_data = None, None, None, None, None
1047
+ lora_alphas = None
1048
1048
 
1049
1049
  if len(invalid_keys) > 0:
1050
1050
  msg = "Lora '{path}' contains non Lora keys '{trunc(invalid_keys,200)}'"
@@ -1409,7 +1409,9 @@ def extract_models(obj = None, prefix = None):
1409
1409
  elif prefix[ -1:] != "/":
1410
1410
  prefix + "/"
1411
1411
 
1412
- for name in dir(obj):
1412
+ for name in dir(obj):
1413
+ if name in ["_execution_device"]:
1414
+ continue
1413
1415
  element = getattr(obj,name)
1414
1416
  if name in ("pipeline", "pipe"):
1415
1417
  pipeline = element
@@ -1548,10 +1550,10 @@ class offload:
1548
1550
  lora_data = lora_module.get(adapter, None)
1549
1551
  if lora_data == None:
1550
1552
  continue
1551
- lora_A, lora_B = lora_data
1553
+ lora_A, lora_B, alpha = lora_data
1552
1554
  key = adapter + '_GPU'
1553
1555
  if to_GPU:
1554
- lora_module[key] = [lora_A.cuda(), lora_B.cuda()]
1556
+ lora_module[key] = [lora_A.cuda(non_blocking=True), lora_B.cuda(non_blocking=True), alpha]
1555
1557
  elif key in lora_module:
1556
1558
  del lora_module[key]
1557
1559
 
@@ -1595,8 +1597,8 @@ class offload:
1595
1597
  lora_data = loras_model_data.get(parent_module, None)
1596
1598
  if lora_data != None:
1597
1599
  loras_modules[parent_module]= lora_data
1598
- if len(loras_modules) > 0:
1599
- self._move_loras(loras_active_adapters, loras_modules, True)
1600
+ if len(loras_modules) > 0:
1601
+ self._move_loras(loras_active_adapters, loras_modules, True)
1600
1602
 
1601
1603
  loaded_block = self.loaded_blocks[model_id]
1602
1604
 
@@ -1801,8 +1803,8 @@ class offload:
1801
1803
  data = loras_data.get(active_adapter + '_GPU', None)
1802
1804
  if data == None:
1803
1805
  continue
1804
- lora_A_weight, lora_B_weight = data
1805
- scaling = get_scaling(active_adapter)
1806
+ lora_A_weight, lora_B_weight, alpha = data
1807
+ scaling = get_scaling(active_adapter) * alpha
1806
1808
  weight.addmm_(lora_B_weight, lora_A_weight, alpha= scaling )
1807
1809
  # base_weight += scaling * lora_B_weight @ lora_A_weight
1808
1810
 
@@ -1822,9 +1824,9 @@ class offload:
1822
1824
  data = loras_data.get(active_adapter + '_GPU', None)
1823
1825
  if data == None:
1824
1826
  continue
1825
- lora_A, lora_B = data
1827
+ lora_A, lora_B, alpha = data
1826
1828
  # dropout = self.lora_dropout[active_adapter]
1827
- scaling = get_scaling(active_adapter)
1829
+ scaling = get_scaling(active_adapter) * alpha
1828
1830
  x = x.to(lora_A.dtype)
1829
1831
 
1830
1832
  if training:
@@ -2248,7 +2250,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, l
2248
2250
  print(f"Model '{model_id}' already pinned to reserved memory")
2249
2251
  else:
2250
2252
  _pin_to_memory(current_model, model_id, partialPinning= partialPinning, pinnedPEFTLora = pinnedPEFTLora, verboseLevel=verboseLevel)
2251
-
2253
+
2252
2254
  current_budget = model_budgets[model_id]
2253
2255
  cur_blocks_prefix, prev_blocks_name, cur_blocks_name,cur_blocks_seq, is_mod_seq = None, None, None, -1, False
2254
2256
  self.loaded_blocks[model_id] = None
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: mmgp
3
- Version: 3.2.7
3
+ Version: 3.3.0
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  License: GNU GENERAL PUBLIC LICENSE
@@ -13,10 +13,11 @@ Requires-Dist: optimum-quanto
13
13
  Requires-Dist: accelerate
14
14
  Requires-Dist: safetensors
15
15
  Requires-Dist: psutil
16
+ Dynamic: license-file
16
17
 
17
18
 
18
19
  <p align="center">
19
- <H2>Memory Management 3.2.7 for the GPU Poor by DeepBeepMeep</H2>
20
+ <H2>Memory Management 3.3.0 for the GPU Poor by DeepBeepMeep</H2>
20
21
  </p>
21
22
 
22
23
 
@@ -0,0 +1,9 @@
1
+ __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
3
+ mmgp/offload.py,sha256=xdlYbB8nKUywAAMPcfCzJmCxYHvBB5vcZgv2wEQTtbE,105329
4
+ mmgp/safetensors2.py,sha256=DCdlRH3769CTyraAmWAB3b0XrVua7z6ygQ-OyKgJN6A,16453
5
+ mmgp-3.3.0.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
6
+ mmgp-3.3.0.dist-info/METADATA,sha256=33eB_YmC6PciTkzi_Z_gsWWzoz6RJgyLbEItFatVghk,16153
7
+ mmgp-3.3.0.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
8
+ mmgp-3.3.0.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
9
+ mmgp-3.3.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.0.0)
2
+ Generator: setuptools (77.0.3)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,9 +0,0 @@
1
- __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
3
- mmgp/offload.py,sha256=6qJrxM3EPqUHC04njZetVY2sr2x9DQwh13CZIM5oLIA,105417
4
- mmgp/safetensors2.py,sha256=DCdlRH3769CTyraAmWAB3b0XrVua7z6ygQ-OyKgJN6A,16453
5
- mmgp-3.2.7.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
6
- mmgp-3.2.7.dist-info/METADATA,sha256=zu_MxYB3j6sYNqQShyKnNwJkv0_j-fO6qOHoO8PUUfY,16131
7
- mmgp-3.2.7.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
8
- mmgp-3.2.7.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
9
- mmgp-3.2.7.dist-info/RECORD,,