mmgp 3.4.9__tar.gz → 3.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmgp might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mmgp
3
- Version: 3.4.9
3
+ Version: 3.5.0
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  Requires-Python: >=3.10
@@ -15,7 +15,7 @@ Dynamic: license-file
15
15
 
16
16
 
17
17
  <p align="center">
18
- <H2>Memory Management 3.4.9 for the GPU Poor by DeepBeepMeep</H2>
18
+ <H2>Memory Management 3.5.0 for the GPU Poor by DeepBeepMeep</H2>
19
19
  </p>
20
20
 
21
21
 
@@ -1,6 +1,6 @@
1
1
 
2
2
  <p align="center">
3
- <H2>Memory Management 3.4.9 for the GPU Poor by DeepBeepMeep</H2>
3
+ <H2>Memory Management 3.5.0 for the GPU Poor by DeepBeepMeep</H2>
4
4
  </p>
5
5
 
6
6
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mmgp"
3
- version = "3.4.9"
3
+ version = "3.5.0"
4
4
  authors = [
5
5
  { name = "deepbeepmeep", email = "deepbeepmeep@yahoo.com" },
6
6
  ]
@@ -1,4 +1,4 @@
1
- # ------------------ Memory Management 3.4.9 for the GPU Poor by DeepBeepMeep (mmgp)------------------
1
+ # ------------------ Memory Management 3.5.0 for the GPU Poor by DeepBeepMeep (mmgp)------------------
2
2
  #
3
3
  # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
4
4
  # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -658,7 +658,7 @@ def _welcome():
658
658
  if welcome_displayed:
659
659
  return
660
660
  welcome_displayed = True
661
- print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.4.9) by DeepBeepMeep ************{ENDC}{UNBOLD}")
661
+ print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.0) by DeepBeepMeep ************{ENDC}{UNBOLD}")
662
662
 
663
663
  def change_dtype(model, new_dtype, exclude_buffers = False):
664
664
  for submodule_name, submodule in model.named_modules():
@@ -1019,33 +1019,18 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
1019
1019
 
1020
1020
  if split_linear_modules_map != None:
1021
1021
  new_state_dict = dict()
1022
- targets_A = { "."+k+".lora_A.weight" : k for k in split_linear_modules_map }
1023
- targets_B = { "."+k+".lora_B.weight" : k for k in split_linear_modules_map }
1022
+ suffixes = [(".alpha", -2, False), (".lora_B.weight", -3, True), (".lora_A.weight", -3, False)]
1024
1023
  for module_name, module_data in state_dict.items():
1025
- if any(module_name.endswith(suffix) for suffix in targets_B):
1026
- for suffix, target_module in targets_B.items():
1027
- if module_name.endswith(suffix):
1028
- break
1029
- parent_module_name = module_name[:-len(suffix)]
1030
- map = split_linear_modules_map[target_module]
1031
- mapped_modules = map["mapped_modules"]
1032
- split_sizes = map["split_sizes"]
1033
- sub_data = torch.split(module_data, split_sizes, dim=0)
1034
- for sub_name, subdata, in zip(mapped_modules, sub_data):
1035
- new_module_name = parent_module_name + "." + sub_name + ".lora_B.weight"
1036
- new_state_dict[new_module_name] = subdata
1037
- elif any(module_name.endswith(suffix) for suffix in targets_A):
1038
- for suffix, target_module in targets_A.items():
1039
- if module_name.endswith(suffix):
1040
- break
1041
- parent_module_name = module_name[:-len(suffix)]
1042
- map = split_linear_modules_map[target_module]
1043
- mapped_modules = map["mapped_modules"]
1044
- for sub_name in mapped_modules :
1045
- new_module_name = parent_module_name + "." + sub_name + ".lora_A.weight"
1046
- new_state_dict[new_module_name] = module_data
1047
- else:
1048
- new_state_dict[module_name] = module_data
1024
+ name_parts = module_name.split(".")
1025
+ for suffix, pos, any_split in suffixes:
1026
+ if module_name.endswith(suffix) and (map := split_linear_modules_map.get(name_parts[pos], None )) != None:
1027
+ parent_module_name, module_name = ".".join(name_parts[:pos]), None
1028
+ sub_data = torch.split(module_data, map["split_sizes"], dim=0) if any_split else [None] * len(map["mapped_modules"])
1029
+ for sub_name, subdata in zip(map["mapped_modules"], sub_data):
1030
+ new_module_name = parent_module_name + "." + sub_name + suffix
1031
+ new_state_dict[new_module_name] = subdata if any_split else module_data
1032
+ break
1033
+ if module_name != None: new_state_dict[module_name] = module_data
1049
1034
  state_dict = new_state_dict
1050
1035
  del new_state_dict
1051
1036
  # tied_weights = _extract_tie_weights_from_sd(state_dict, path) # to do
@@ -1118,7 +1103,9 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
1118
1103
  fail = True
1119
1104
  break
1120
1105
  module_shape = module.weight.shape
1106
+ rank = None
1121
1107
  if lora_A != None:
1108
+ rank = lora_A.shape[0]
1122
1109
  if module_shape[1] != v.shape[1]:
1123
1110
  if ignore_model_variations:
1124
1111
  skip = True
@@ -1128,6 +1115,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
1128
1115
  fail = True
1129
1116
  break
1130
1117
  elif lora_B != None:
1118
+ rank = lora_B.shape[1]
1131
1119
  if module_shape[0] != v.shape[0]:
1132
1120
  if ignore_model_variations:
1133
1121
  skip = True
@@ -1147,6 +1135,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
1147
1135
  fail = True
1148
1136
  break
1149
1137
  elif diff_b != None:
1138
+ rank = diff_b.shape[0]
1150
1139
  if module.bias == None:
1151
1140
  msg = f"Lora '{path}': Lora Basis is defined while it doesnt exist in model '{_get_module_name(model)}'. It is likely this Lora has been made for another version of this model."
1152
1141
  fail = True
@@ -1164,25 +1153,23 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
1164
1153
 
1165
1154
  if not check_only:
1166
1155
  loras_module_data = loras_model_data.get(module, None)
1167
- if loras_module_data == None:
1168
- pass
1169
1156
  assert loras_module_data != None
1170
1157
  loras_adapter_data = loras_module_data.get(adapter_name, None)
1171
- lora_A = None if lora_A == None else lora_A.to(module.weight.dtype)
1172
- lora_B = None if lora_B == None else lora_B.to(module.weight.dtype)
1173
- diff_b = None if diff_b == None else diff_b.to(module.weight.dtype)
1174
1158
  if loras_adapter_data == None:
1175
- alpha = lora_alphas.get(k[:-len("lora_X.weight")] + "alpha", 1.)
1176
- loras_adapter_data = [lora_A, lora_B, diff_b, alpha]
1159
+ loras_adapter_data = [None, None, None, 1.]
1177
1160
  loras_module_data[adapter_name] = loras_adapter_data
1178
- elif lora_A != None:
1179
- loras_adapter_data[0] = lora_A
1161
+ if lora_A != None:
1162
+ loras_adapter_data[0] = lora_A.to(module.weight.dtype)
1180
1163
  elif lora_B != None:
1181
- loras_adapter_data[1] = lora_B
1164
+ loras_adapter_data[1] = lora_B.to(module.weight.dtype)
1182
1165
  else:
1183
- loras_adapter_data[2] = diff_b
1184
- lora_A, lora_B, diff, diff_b, v, loras_module_data, loras_adapter_data = None, None, None, None, None, None, None
1185
- lora_alphas = None
1166
+ loras_adapter_data[2] = diff_b.to(module.weight.dtype)
1167
+ if rank != None:
1168
+ alpha_key = k[:-len("lora_X.weight")] + "alpha"
1169
+ alpha = lora_alphas.get(alpha_key, None)
1170
+ alpha = 1. if alpha == None else alpha / rank
1171
+ loras_adapter_data[3] = alpha
1172
+ lora_A = lora_B = diff = diff_b = v = loras_module_data = loras_adapter_data = lora_alphas = None
1186
1173
 
1187
1174
  if len(invalid_keys) > 0:
1188
1175
  msg = f"Lora '{path}' contains non Lora keys '{trunc(invalid_keys,200)}'"
@@ -1237,7 +1224,7 @@ def unload_loras_from_model(model):
1237
1224
  for _, v in model._loras_model_data.items():
1238
1225
  v.clear()
1239
1226
 
1240
- model._loras_active_adapters = set()
1227
+ model._loras_active_adapters = []
1241
1228
  model._loras_scaling = dict()
1242
1229
  model._loras_tied_weights = dict()
1243
1230
  model._loras_errors = None
@@ -1248,7 +1235,7 @@ def unload_loras_from_model(model):
1248
1235
  def set_step_no_for_lora(model, step_no):
1249
1236
  model._lora_step_no = step_no
1250
1237
 
1251
- def activate_loras(model, lora_nos, lora_multi = None ):
1238
+ def activate_loras(model, lora_nos, lora_multi = None):
1252
1239
  if not isinstance(lora_nos, list):
1253
1240
  lora_nos = [lora_nos]
1254
1241
  lora_nos = [str(l) for l in lora_nos]
@@ -1261,7 +1248,7 @@ def activate_loras(model, lora_nos, lora_multi = None ):
1261
1248
  lora_scaling_dict[no] = multi
1262
1249
 
1263
1250
  model._lora_step_no = 0
1264
- model._loras_active_adapters = set(lora_nos)
1251
+ model._loras_active_adapters = lora_nos
1265
1252
  model._loras_scaling = lora_scaling_dict
1266
1253
 
1267
1254
 
@@ -1287,7 +1274,7 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
1287
1274
  model_path = [model_path]
1288
1275
 
1289
1276
 
1290
- if not builtins.all(file_name.endswith(".sft") or file_name.endswith(".safetensors") for file_name in model_path):
1277
+ if not builtins.all(file_name.endswith(".sft") or file_name.endswith(".safetensors") or file_name.endswith(".pt") for file_name in model_path):
1291
1278
  raise Exception("full model path to file expected")
1292
1279
 
1293
1280
  model_path = [ _get_model(file) for file in model_path]
@@ -1295,9 +1282,11 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
1295
1282
  raise Exception("Unable to find file")
1296
1283
 
1297
1284
  verboseLevel = _compute_verbose_level(verboseLevel)
1298
-
1299
- with safetensors2.safe_open(model_path[-1], writable_tensors =writable_tensors) as f:
1300
- metadata = f.metadata()
1285
+ if model_path[-1].endswith(".pt"):
1286
+ metadata = None
1287
+ else:
1288
+ with safetensors2.safe_open(model_path[-1], writable_tensors =writable_tensors) as f:
1289
+ metadata = f.metadata()
1301
1290
 
1302
1291
  if metadata is None:
1303
1292
  transformer_config = None
@@ -1737,7 +1726,7 @@ class offload:
1737
1726
  continue
1738
1727
  key = adapter + '_GPU'
1739
1728
  if to_GPU:
1740
- lora_module[key] = [None if item == None else item.cuda(non_blocking=True) for item in lora_data[ :-1] ] + lora_data[ -1:]
1729
+ lora_module[key] = [None if item == None else item.cuda(non_blocking=True) for item in lora_data[ :-1] ] + lora_data[ -1:]
1741
1730
  elif key in lora_module:
1742
1731
  del lora_module[key]
1743
1732
 
@@ -2015,7 +2004,7 @@ class offload:
2015
2004
  training = False
2016
2005
 
2017
2006
  dtype = weight.dtype
2018
- if weight.shape[-1] < x.shape[-2] : # sum base weight and lora matrices instead of applying input on each sub lora matrice if input is too large. This will save a lot VRAM and compute
2007
+ if weight.shape[-1] < x.shape[-2]: # sum base weight and lora matrices instead of applying input on each sub lora matrice if input is too large. This will save a lot VRAM and compute
2019
2008
  bias = submodule.bias
2020
2009
  original_bias = True
2021
2010
  if len(active_adapters) > 0:
@@ -2023,12 +2012,8 @@ class offload:
2023
2012
  weight = weight.view(weight.shape) # get a persistent copy of the on the fly dequantized weights
2024
2013
  else:
2025
2014
  weight = weight.clone()
2026
-
2027
-
2028
2015
  for active_adapter in active_adapters:
2029
2016
  data = loras_data.get(active_adapter + '_GPU', None)
2030
- if data == None:
2031
- continue
2032
2017
  lora_A_weight, lora_B_weight, diff_b, alpha = data
2033
2018
  scaling = self._get_lora_scaling(loras_scaling, model, active_adapter) * alpha
2034
2019
  if lora_A_weight != None:
@@ -2042,9 +2027,8 @@ class offload:
2042
2027
  bias = bias.clone()
2043
2028
  original_bias = False
2044
2029
  bias.add_(diff_b, alpha=scaling)
2045
-
2046
2030
  # base_weight += scaling * lora_B_weight @ lora_A_weight
2047
-
2031
+ break
2048
2032
  if training:
2049
2033
  pass
2050
2034
  # result = torch.nn.functional.linear(dropout(x), base_weight, bias=submodule.bias)
@@ -2093,6 +2077,7 @@ class offload:
2093
2077
  if len(loras_data) == 0:
2094
2078
  return old_forward(*args, **kwargs)
2095
2079
  else:
2080
+ # submodule.aaa = submodule_name
2096
2081
  return self._lora_linear_forward(current_model, submodule, loras_data, *args, **kwargs)
2097
2082
  target_fn = lora_linear_forward
2098
2083
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mmgp
3
- Version: 3.4.9
3
+ Version: 3.5.0
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  Requires-Python: >=3.10
@@ -15,7 +15,7 @@ Dynamic: license-file
15
15
 
16
16
 
17
17
  <p align="center">
18
- <H2>Memory Management 3.4.9 for the GPU Poor by DeepBeepMeep</H2>
18
+ <H2>Memory Management 3.5.0 for the GPU Poor by DeepBeepMeep</H2>
19
19
  </p>
20
20
 
21
21
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes