mmgp 3.2.3__tar.gz → 3.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmgp might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mmgp
3
- Version: 3.2.3
3
+ Version: 3.2.4
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  License: GNU GENERAL PUBLIC LICENSE
@@ -17,7 +17,7 @@ Requires-Dist: peft
17
17
 
18
18
 
19
19
  <p align="center">
20
- <H2>Memory Management 3.2.3 for the GPU Poor by DeepBeepMeep</H2>
20
+ <H2>Memory Management 3.2.4 for the GPU Poor by DeepBeepMeep</H2>
21
21
  </p>
22
22
 
23
23
 
@@ -1,6 +1,6 @@
1
1
 
2
2
  <p align="center">
3
- <H2>Memory Management 3.2.3 for the GPU Poor by DeepBeepMeep</H2>
3
+ <H2>Memory Management 3.2.4 for the GPU Poor by DeepBeepMeep</H2>
4
4
  </p>
5
5
 
6
6
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mmgp"
3
- version = "3.2.3"
3
+ version = "3.2.4"
4
4
  authors = [
5
5
  { name = "deepbeepmeep", email = "deepbeepmeep@yahoo.com" },
6
6
  ]
@@ -1,4 +1,4 @@
1
- # ------------------ Memory Management 3.2.3 for the GPU Poor by DeepBeepMeep (mmgp)------------------
1
+ # ------------------ Memory Management 3.2.4 for the GPU Poor by DeepBeepMeep (mmgp)------------------
2
2
  #
3
3
  # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
4
4
  # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -479,7 +479,7 @@ def _welcome():
479
479
  if welcome_displayed:
480
480
  return
481
481
  welcome_displayed = True
482
- print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.3) by DeepBeepMeep ************{ENDC}{UNBOLD}")
482
+ print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.4) by DeepBeepMeep ************{ENDC}{UNBOLD}")
483
483
 
484
484
  def _extract_num_from_str(num_in_str):
485
485
  size = len(num_in_str)
@@ -858,7 +858,7 @@ def _lora_linear_forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor
858
858
  result = result.to(torch_result_dtype)
859
859
  return result
860
860
 
861
- def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_loras = True, split_linear_modules_map = None,verboseLevel = -1,):
861
+ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_loras = True, split_linear_modules_map = None, preprocess_sd = None, verboseLevel = -1,):
862
862
  verboseLevel = _compute_verbose_level(verboseLevel)
863
863
 
864
864
  if inject_adapter_in_model == None or set_weights_and_activate_adapters == None or get_peft_kwargs == None:
@@ -877,7 +877,8 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
877
877
  adapter_name = str(i)
878
878
 
879
879
  state_dict = safetensors2.torch_load_file(path)
880
-
880
+ if preprocess_sd != None:
881
+ state_dict = preprocess_sd(state_dict)
881
882
 
882
883
  if split_linear_modules_map != None:
883
884
  new_state_dict = {}
@@ -977,7 +978,8 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
977
978
  # Check only for unexpected keys.
978
979
  unexpected_keys = getattr(incompatible_keys, "unexpected_keys", None)
979
980
  if unexpected_keys:
980
- pass
981
+ raise Exception(f"Lora '{path}' contains invalid keys '{unexpected_keys}'")
982
+
981
983
  if verboseLevel >=1:
982
984
  print(f"Lora '{path}' was loaded in model '{_get_module_name(model)}'")
983
985
  if activate_all_loras:
@@ -1015,7 +1017,7 @@ def move_loras_to_device(model, device="cpu" ):
1015
1017
  if ".lora_" in k:
1016
1018
  m.to(device)
1017
1019
 
1018
- def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, modelClass=None, verboseLevel = -1):
1020
+ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, modelClass=None, modelPrefix = None, verboseLevel = -1):
1019
1021
  """
1020
1022
  quick version of .LoadfromPretrained of the transformers library
1021
1023
  used to build a model and load the corresponding weights (quantized or not)
@@ -1096,13 +1098,13 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
1096
1098
 
1097
1099
  model._config = transformer_config
1098
1100
 
1099
- load_model_data(model,model_path, do_quantize = do_quantize, quantizationType = quantizationType, pinToMemory= pinToMemory, partialPinning= partialPinning, verboseLevel=verboseLevel )
1101
+ load_model_data(model,model_path, do_quantize = do_quantize, quantizationType = quantizationType, pinToMemory= pinToMemory, partialPinning= partialPinning, modelPrefix = modelPrefix, verboseLevel=verboseLevel )
1100
1102
 
1101
1103
  return model
1102
1104
 
1103
1105
 
1104
1106
 
1105
- def load_model_data(model, file_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, verboseLevel = -1):
1107
+ def load_model_data(model, file_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, modelPrefix = None, verboseLevel = -1):
1106
1108
  """
1107
1109
  Load a model, detect if it has been previously quantized using quanto and do the extra setup if necessary
1108
1110
  """
@@ -1113,6 +1115,26 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
1113
1115
  verboseLevel = _compute_verbose_level(verboseLevel)
1114
1116
 
1115
1117
  model = _remove_model_wrapper(model)
1118
+
1119
+ def filter_state_dict(state_dict, base_model_prefix):
1120
+ new_state_dict= {}
1121
+ start = -1
1122
+ for k,v in state_dict.items():
1123
+ if k.startswith(base_model_prefix):
1124
+
1125
+ new_start = len(base_model_prefix)
1126
+ else:
1127
+ pos = k.find("." + base_model_prefix)
1128
+ if pos < 0:
1129
+ continue
1130
+ new_start = pos + len(base_model_prefix) +1
1131
+ if start != -1 and start != new_start:
1132
+ new_state_dict = state_dict
1133
+ break
1134
+ start = new_start
1135
+ new_state_dict[k[ start:]] = v
1136
+ return new_state_dict
1137
+
1116
1138
  if not (".safetensors" in file_path or ".sft" in file_path):
1117
1139
  if pinToMemory:
1118
1140
  raise Exception("Pinning to memory while loading only supported for safe tensors files")
@@ -1151,6 +1173,11 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
1151
1173
  quantization_map = json.load(f)
1152
1174
 
1153
1175
 
1176
+ # deal if we are trying to load just a sub part of a larger model
1177
+ if modelPrefix != None:
1178
+ base_model_prefix = modelPrefix + "."
1179
+ state_dict = filter_state_dict(state_dict,base_model_prefix)
1180
+ quantization_map = filter_state_dict(quantization_map,base_model_prefix)
1154
1181
 
1155
1182
  if quantization_map is None :
1156
1183
  if "quanto" in file_path and not do_quantize:
@@ -1160,32 +1187,12 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
1160
1187
 
1161
1188
  missing_keys , unexpected_keys = model.load_state_dict(state_dict, False, assign = True )
1162
1189
  if len(missing_keys) > 0 :
1163
- # if there is a key mismatch maybe we forgot to remove some prefix or we are trying to load just a sub part of a larger model
1164
- if hasattr(model, "base_model_prefix"):
1165
- base_model_prefix = model.base_model_prefix + "."
1166
- else:
1167
- for k,v in state_dict.items():
1168
- if k.endswith(missing_keys[0]):
1169
- base_model_prefix = k[:-len(missing_keys[0])]
1170
- break
1171
-
1172
- new_state_dict= {}
1173
- start = -1
1190
+ # if there is a key mismatch maybe we forgot to remove some prefix
1174
1191
  for k,v in state_dict.items():
1175
- if k.startswith(base_model_prefix):
1176
- new_start = len(base_model_prefix)
1177
- else:
1178
- pos = k.find("." + base_model_prefix)
1179
- if pos < 0:
1180
- continue
1181
- new_start = pos + len(base_model_prefix) +1
1182
- if start != -1 and start != new_start:
1183
- new_state_dict = state_dict
1192
+ if k.endswith(missing_keys[0]):
1193
+ base_model_prefix = k[:-len(missing_keys[0])]
1184
1194
  break
1185
- start = new_start
1186
- new_state_dict[k[ start:]] = v
1187
- state_dict = new_state_dict
1188
- del new_state_dict
1195
+ state_dict = filter_state_dict(state_dict,base_model_prefix)
1189
1196
  missing_keys , unexpected_keys = model.load_state_dict(state_dict, False, assign = True )
1190
1197
  del state_dict
1191
1198
 
@@ -1354,6 +1361,8 @@ class offload:
1354
1361
 
1355
1362
  def add_module_to_blocks(self, model_id, blocks_name, submodule, prev_block_name, submodule_name):
1356
1363
 
1364
+ if blocks_name!=None and ".lora_" in blocks_name:
1365
+ blocks_name = None
1357
1366
  entry_name = model_id if blocks_name is None else model_id + "/" + blocks_name
1358
1367
  if entry_name in self.blocks_of_modules:
1359
1368
  blocks_params = self.blocks_of_modules[entry_name]
@@ -1372,7 +1381,6 @@ class offload:
1372
1381
  lora_name = None
1373
1382
  if self.lora_parents.get(submodule, None) != None:
1374
1383
  lora_name = str(submodule_name[ submodule_name.rfind(".") + 1: ] )
1375
-
1376
1384
  for k,p in submodule.named_parameters(recurse=False):
1377
1385
  param_size = 0
1378
1386
  ref = _get_tensor_ref(p)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mmgp
3
- Version: 3.2.3
3
+ Version: 3.2.4
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  License: GNU GENERAL PUBLIC LICENSE
@@ -17,7 +17,7 @@ Requires-Dist: peft
17
17
 
18
18
 
19
19
  <p align="center">
20
- <H2>Memory Management 3.2.3 for the GPU Poor by DeepBeepMeep</H2>
20
+ <H2>Memory Management 3.2.4 for the GPU Poor by DeepBeepMeep</H2>
21
21
  </p>
22
22
 
23
23
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes