mmgp 3.2.0__tar.gz → 3.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmgp might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mmgp
3
- Version: 3.2.0
3
+ Version: 3.2.2
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  License: GNU GENERAL PUBLIC LICENSE
@@ -17,7 +17,7 @@ Requires-Dist: peft
17
17
 
18
18
 
19
19
  <p align="center">
20
- <H2>Memory Management 3.2.0 for the GPU Poor by DeepBeepMeep</H2>
20
+ <H2>Memory Management 3.2.1 for the GPU Poor by DeepBeepMeep</H2>
21
21
  </p>
22
22
 
23
23
 
@@ -44,6 +44,9 @@ Each profile may use a combination of the following:
44
44
 
45
45
  ## Sample applications that use mmgp
46
46
  It is recommended to have a look at these applications to see how mmgp was implemented in each of them:
47
+ - Wan2GP: https://github.com/deepbeepmeep/Wan2GP :\
48
+ An excellent text to video and image to video generator by Alibaba
49
+
47
50
  - Hunyuan3D-2GP: https://github.com/deepbeepmeep/Hunyuan3D-2GP :\
48
51
  A great image to 3D and text to 3D tool by the Tencent team. Thanks to mmgp it can run with less than 6 GB of VRAM
49
52
 
@@ -1,6 +1,6 @@
1
1
 
2
2
  <p align="center">
3
- <H2>Memory Management 3.2.0 for the GPU Poor by DeepBeepMeep</H2>
3
+ <H2>Memory Management 3.2.1 for the GPU Poor by DeepBeepMeep</H2>
4
4
  </p>
5
5
 
6
6
 
@@ -27,6 +27,9 @@ Each profile may use a combination of the following:
27
27
 
28
28
  ## Sample applications that use mmgp
29
29
  It is recommended to have a look at these applications to see how mmgp was implemented in each of them:
30
+ - Wan2GP: https://github.com/deepbeepmeep/Wan2GP :\
31
+ An excellent text to video and image to video generator by Alibaba
32
+
30
33
  - Hunyuan3D-2GP: https://github.com/deepbeepmeep/Hunyuan3D-2GP :\
31
34
  A great image to 3D and text to 3D tool by the Tencent team. Thanks to mmgp it can run with less than 6 GB of VRAM
32
35
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mmgp"
3
- version = "3.2.0"
3
+ version = "3.2.2"
4
4
  authors = [
5
5
  { name = "deepbeepmeep", email = "deepbeepmeep@yahoo.com" },
6
6
  ]
@@ -1,4 +1,4 @@
1
- # ------------------ Memory Management 3.2.0 for the GPU Poor by DeepBeepMeep (mmgp)------------------
1
+ # ------------------ Memory Management 3.2.1 for the GPU Poor by DeepBeepMeep (mmgp)------------------
2
2
  #
3
3
  # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
4
4
  # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -479,7 +479,7 @@ def _welcome():
479
479
  if welcome_displayed:
480
480
  return
481
481
  welcome_displayed = True
482
- print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.0) by DeepBeepMeep ************{ENDC}{UNBOLD}")
482
+ print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.1) by DeepBeepMeep ************{ENDC}{UNBOLD}")
483
483
 
484
484
  def _extract_num_from_str(num_in_str):
485
485
  size = len(num_in_str)
@@ -603,8 +603,6 @@ def _quantize(model_to_quantize, weights=qint8, verboseLevel = 1, threshold = 2*
603
603
  tied_weights= {}
604
604
 
605
605
  for submodule_name, submodule in model_to_quantize.named_modules():
606
- if "embed_token" in submodule_name:
607
- pass
608
606
  if isinstance(submodule, QModuleMixin):
609
607
  if verboseLevel>=1:
610
608
  print("No quantization to do as model is already quantized")
@@ -802,7 +800,7 @@ def _lora_linear_forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor
802
800
  scaling = get_scaling(active_adapter)
803
801
  lora_A_weight = lora_A.weight
804
802
  lora_B_weight = lora_B.weight
805
- if new_weights:
803
+ if new_weights:
806
804
  base_weight = torch.addmm(base_weight, lora_B_weight, lora_A_weight, alpha= scaling )
807
805
  # base_weight = base_weight + scaling * lora_B_weight @ lora_A_weight
808
806
  else:
@@ -1017,7 +1015,7 @@ def move_loras_to_device(model, device="cpu" ):
1017
1015
  if ".lora_" in k:
1018
1016
  m.to(device)
1019
1017
 
1020
- def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, verboseLevel = -1):
1018
+ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, modelClass=None, verboseLevel = -1):
1021
1019
  """
1022
1020
  quick version of .LoadfromPretrained of the transformers library
1023
1021
  used to build a model and load the corresponding weights (quantized or not)
@@ -1031,6 +1029,7 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
1031
1029
  raise Exception("full model path to file expected")
1032
1030
 
1033
1031
  model_path = _get_model(model_path)
1032
+
1034
1033
  verboseLevel = _compute_verbose_level(verboseLevel)
1035
1034
 
1036
1035
  with safetensors2.safe_open(model_path) as f:
@@ -1058,11 +1057,13 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
1058
1057
  if "architectures" in transformer_config:
1059
1058
  architectures = transformer_config["architectures"]
1060
1059
  class_name = architectures[0]
1061
-
1062
- module = __import__("transformers")
1063
- map = { "T5WithLMHeadModel" : "T5EncoderModel"}
1064
- class_name = map.get(class_name, class_name)
1065
- transfomer_class = getattr(module, class_name)
1060
+ if modelClass !=None:
1061
+ transfomer_class = modelClass
1062
+ else:
1063
+ module = __import__("transformers")
1064
+ map = { "T5WithLMHeadModel" : "T5EncoderModel"}
1065
+ class_name = map.get(class_name, class_name)
1066
+ transfomer_class = getattr(module, class_name)
1066
1067
  from transformers import AutoConfig
1067
1068
 
1068
1069
  import tempfile
@@ -1081,8 +1082,11 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
1081
1082
  elif "_class_name" in transformer_config:
1082
1083
  class_name = transformer_config["_class_name"]
1083
1084
 
1084
- module = __import__("diffusers")
1085
- transfomer_class = getattr(module, class_name)
1085
+ if modelClass !=None:
1086
+ transfomer_class = modelClass
1087
+ else:
1088
+ module = __import__("diffusers")
1089
+ transfomer_class = getattr(module, class_name)
1086
1090
 
1087
1091
  with init_empty_weights():
1088
1092
  model = transfomer_class.from_config(transformer_config)
@@ -1104,6 +1108,8 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
1104
1108
  """
1105
1109
 
1106
1110
  file_path = _get_model(file_path)
1111
+ if file_path == None:
1112
+ raise Exception("Unable to find file")
1107
1113
  verboseLevel = _compute_verbose_level(verboseLevel)
1108
1114
 
1109
1115
  model = _remove_model_wrapper(model)
@@ -1153,9 +1159,16 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
1153
1159
  _requantize(model, state_dict, quantization_map)
1154
1160
 
1155
1161
  missing_keys , unexpected_keys = model.load_state_dict(state_dict, False, assign = True )
1156
- if len(missing_keys) > 0 and hasattr(model, "base_model_prefix"):
1162
+ if len(missing_keys) > 0 :
1157
1163
  # if there is a key mismatch maybe we forgot to remove some prefix or we are trying to load just a sub part of a larger model
1158
- base_model_prefix = model.base_model_prefix + "."
1164
+ if hasattr(model, "base_model_prefix"):
1165
+ base_model_prefix = model.base_model_prefix + "."
1166
+ else:
1167
+ for k,v in state_dict.items():
1168
+ if k.endswith(missing_keys[0]):
1169
+ base_model_prefix = k[:-len(missing_keys[0])]
1170
+ break
1171
+
1159
1172
  new_state_dict= {}
1160
1173
  start = -1
1161
1174
  for k,v in state_dict.items():
@@ -1521,7 +1534,6 @@ class offload:
1521
1534
  model = self.models[model_id]
1522
1535
  self.active_models.append(model)
1523
1536
  self.active_models_ids.append(model_id)
1524
-
1525
1537
  self.gpu_load_blocks(model_id, None, True)
1526
1538
  for block_name in self.preloaded_blocks_per_model[model_id]:
1527
1539
  self.gpu_load_blocks(model_id, block_name, True)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mmgp
3
- Version: 3.2.0
3
+ Version: 3.2.2
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  License: GNU GENERAL PUBLIC LICENSE
@@ -17,7 +17,7 @@ Requires-Dist: peft
17
17
 
18
18
 
19
19
  <p align="center">
20
- <H2>Memory Management 3.2.0 for the GPU Poor by DeepBeepMeep</H2>
20
+ <H2>Memory Management 3.2.1 for the GPU Poor by DeepBeepMeep</H2>
21
21
  </p>
22
22
 
23
23
 
@@ -44,6 +44,9 @@ Each profile may use a combination of the following:
44
44
 
45
45
  ## Sample applications that use mmgp
46
46
  It is recommended to have a look at these applications to see how mmgp was implemented in each of them:
47
+ - Wan2GP: https://github.com/deepbeepmeep/Wan2GP :\
48
+ An excellent text to video and image to video generator by Alibaba
49
+
47
50
  - Hunyuan3D-2GP: https://github.com/deepbeepmeep/Hunyuan3D-2GP :\
48
51
  A great image to 3D and text to 3D tool by the Tencent team. Thanks to mmgp it can run with less than 6 GB of VRAM
49
52
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes