mmgp 3.5.3__tar.gz → 3.5.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmgp might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mmgp
3
- Version: 3.5.3
3
+ Version: 3.5.5
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  Requires-Python: >=3.10
@@ -15,7 +15,7 @@ Dynamic: license-file
15
15
 
16
16
 
17
17
  <p align="center">
18
- <H2>Memory Management 3.5.3 for the GPU Poor by DeepBeepMeep</H2>
18
+ <H2>Memory Management 3.5.5 for the GPU Poor by DeepBeepMeep</H2>
19
19
  </p>
20
20
 
21
21
 
@@ -1,6 +1,6 @@
1
1
 
2
2
  <p align="center">
3
- <H2>Memory Management 3.5.3 for the GPU Poor by DeepBeepMeep</H2>
3
+ <H2>Memory Management 3.5.5 for the GPU Poor by DeepBeepMeep</H2>
4
4
  </p>
5
5
 
6
6
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mmgp"
3
- version = "3.5.3"
3
+ version = "3.5.5"
4
4
  authors = [
5
5
  { name = "deepbeepmeep", email = "deepbeepmeep@yahoo.com" },
6
6
  ]
@@ -1,4 +1,4 @@
1
- # ------------------ Memory Management 3.5.3 for the GPU Poor by DeepBeepMeep (mmgp)------------------
1
+ # ------------------ Memory Management 3.5.5 for the GPU Poor by DeepBeepMeep (mmgp)------------------
2
2
  #
3
3
  # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
4
4
  # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -642,6 +642,7 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
642
642
  else:
643
643
  length = torch.numel(p.data) * p.data.element_size()
644
644
  p.data = _move_to_pinned_tensor(p.data, current_big_tensor, offset, length)
645
+
645
646
  tensor_no += 1
646
647
  del p
647
648
  del dummy_pinned_tensor
@@ -667,7 +668,7 @@ def _welcome():
667
668
  if welcome_displayed:
668
669
  return
669
670
  welcome_displayed = True
670
- print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.3) by DeepBeepMeep ************{ENDC}{UNBOLD}")
671
+ print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.5) by DeepBeepMeep ************{ENDC}{UNBOLD}")
671
672
 
672
673
  def change_dtype(model, new_dtype, exclude_buffers = False):
673
674
  for submodule_name, submodule in model.named_modules():
@@ -1145,6 +1146,8 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
1145
1146
  break
1146
1147
  elif diff_b != None:
1147
1148
  rank = diff_b.shape[0]
1149
+ if not hasattr(module, "bias"):
1150
+ pass
1148
1151
  if module.bias == None:
1149
1152
  msg = f"Lora '{path}': Lora Basis is defined while it doesnt exist in model '{_get_module_name(model)}'. It is likely this Lora has been made for another version of this model."
1150
1153
  fail = True
@@ -1248,6 +1251,7 @@ def sync_models_loras(model, model2):
1248
1251
  model2._loras_scaling = model._loras_scaling
1249
1252
 
1250
1253
  def unload_loras_from_model(model):
1254
+ if model is None: return
1251
1255
  for _, v in model._loras_model_data.items():
1252
1256
  v.clear()
1253
1257
  for _, v in model._loras_model_shortcuts.items():
@@ -2087,13 +2091,16 @@ class offload:
2087
2091
  if data == None:
2088
2092
  continue
2089
2093
  diff_w , _ , diff_b, alpha = data
2094
+ scaling = self._get_lora_scaling( loras_scaling, model, active_adapter) * alpha
2095
+ if scaling == 0:
2096
+ continue
2090
2097
  if first_weight:
2091
2098
  original_weight= weight.clone() if weight != None else None
2092
2099
  first_weight = False
2093
2100
  if first_bias:
2094
2101
  original_bias= bias.clone() if bias != None else None
2095
2102
  first_bias = False
2096
- scaling = self._get_lora_scaling( loras_scaling, model, active_adapter) * alpha
2103
+
2097
2104
  if diff_w != None:
2098
2105
  weight.add_(diff_w, alpha= scaling)
2099
2106
  diff_w = None
@@ -2131,6 +2138,8 @@ class offload:
2131
2138
  continue
2132
2139
  lora_A_weight, lora_B_weight, diff_b, alpha = data
2133
2140
  scaling = self._get_lora_scaling(loras_scaling, model, active_adapter) * alpha
2141
+ if scaling == 0:
2142
+ continue
2134
2143
  if lora_A_weight != None:
2135
2144
  weight.addmm_(lora_B_weight, lora_A_weight, alpha= scaling )
2136
2145
 
@@ -2162,6 +2171,8 @@ class offload:
2162
2171
  lora_A, lora_B, diff_b, alpha = data
2163
2172
  # dropout = self.lora_dropout[active_adapter]
2164
2173
  scaling = self._get_lora_scaling(loras_scaling, model, active_adapter) * alpha
2174
+ if scaling == 0:
2175
+ continue
2165
2176
  if lora_A == None:
2166
2177
  result.add_(diff_b, alpha=scaling)
2167
2178
  else:
@@ -2193,7 +2204,7 @@ class offload:
2193
2204
  if len(loras_data) == 0:
2194
2205
  return old_forward(*args, **kwargs)
2195
2206
  else:
2196
- # submodule.aaa = submodule_name
2207
+ submodule.aaa = submodule_name
2197
2208
  return self._lora_linear_forward(current_model, submodule, loras_data, *args, **kwargs)
2198
2209
  target_fn = lora_linear_forward
2199
2210
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mmgp
3
- Version: 3.5.3
3
+ Version: 3.5.5
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  Requires-Python: >=3.10
@@ -15,7 +15,7 @@ Dynamic: license-file
15
15
 
16
16
 
17
17
  <p align="center">
18
- <H2>Memory Management 3.5.3 for the GPU Poor by DeepBeepMeep</H2>
18
+ <H2>Memory Management 3.5.5 for the GPU Poor by DeepBeepMeep</H2>
19
19
  </p>
20
20
 
21
21
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes