mmgp 3.4.7__tar.gz → 3.4.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmgp might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mmgp
3
- Version: 3.4.7
3
+ Version: 3.4.8
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  Requires-Python: >=3.10
@@ -15,7 +15,7 @@ Dynamic: license-file
15
15
 
16
16
 
17
17
  <p align="center">
18
- <H2>Memory Management 3.4.7 for the GPU Poor by DeepBeepMeep</H2>
18
+ <H2>Memory Management 3.4.8 for the GPU Poor by DeepBeepMeep</H2>
19
19
  </p>
20
20
 
21
21
 
@@ -1,6 +1,6 @@
1
1
 
2
2
  <p align="center">
3
- <H2>Memory Management 3.4.7 for the GPU Poor by DeepBeepMeep</H2>
3
+ <H2>Memory Management 3.4.8 for the GPU Poor by DeepBeepMeep</H2>
4
4
  </p>
5
5
 
6
6
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mmgp"
3
- version = "3.4.7"
3
+ version = "3.4.8"
4
4
  authors = [
5
5
  { name = "deepbeepmeep", email = "deepbeepmeep@yahoo.com" },
6
6
  ]
@@ -1,4 +1,4 @@
1
- # ------------------ Memory Management 3.4.7 for the GPU Poor by DeepBeepMeep (mmgp)------------------
1
+ # ------------------ Memory Management 3.4.8 for the GPU Poor by DeepBeepMeep (mmgp)------------------
2
2
  #
3
3
  # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
4
4
  # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -448,9 +448,9 @@ def _pin_sd_to_memory(sd, sd_name, tied_weights = None, gig_tensor_size = BIG_TE
448
448
  print(f"'{','.join(names_list)}' was partially pinned to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
449
449
  else:
450
450
  if len(names_list) > 0:
451
- print(f"'{','.join(names_list)}' was pinned entirely to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
452
- else:
453
451
  print(f"'{','.join(names_list)}' were pinned entirely to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
452
+ else:
453
+ print(f"'{','.join(names_list)}' was pinned entirely to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
454
454
 
455
455
 
456
456
  return
@@ -658,7 +658,7 @@ def _welcome():
658
658
  if welcome_displayed:
659
659
  return
660
660
  welcome_displayed = True
661
- print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.4.7) by DeepBeepMeep ************{ENDC}{UNBOLD}")
661
+ print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.4.8) by DeepBeepMeep ************{ENDC}{UNBOLD}")
662
662
 
663
663
  def change_dtype(model, new_dtype, exclude_buffers = False):
664
664
  for submodule_name, submodule in model.named_modules():
@@ -1226,7 +1226,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
1226
1226
 
1227
1227
  model._loras_errors = errors
1228
1228
  if not check_only:
1229
- if pinnedLora:
1229
+ if pinnedLora and len(pinned_sd_list) > 0:
1230
1230
  _pin_sd_to_memory(pinned_sd_list, pinned_names_list)
1231
1231
  model._loras_adapters = adapters
1232
1232
  if activate_all_loras:
@@ -1407,14 +1407,14 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
1407
1407
  if not (".safetensors" in file or ".sft" in file):
1408
1408
  if pinToMemory:
1409
1409
  raise Exception("Pinning to memory while loading only supported for safe tensors files")
1410
- state_dict = torch.load(file, weights_only=True)
1410
+ state_dict = torch.load(file, weights_only=True, map_location="cpu")
1411
1411
  if "module" in state_dict:
1412
1412
  state_dict = state_dict["module"]
1413
1413
 
1414
1414
  else:
1415
1415
  basename = os.path.basename(file)
1416
1416
 
1417
- if "model-0" in basename:
1417
+ if "-of-" in basename:
1418
1418
  metadata = None
1419
1419
  file_parts= basename.split("-")
1420
1420
  parts_max = int(file_parts[-1][:5])
@@ -1621,9 +1621,12 @@ class HfHook:
1621
1621
  def __init__(self):
1622
1622
  self.execution_device = "cuda"
1623
1623
 
1624
- def detach_hook(self, module):
1625
- pass
1624
+ def init_hook(self, module):
1625
+ return module
1626
1626
 
1627
+ def detach_hook(self, module):
1628
+ return module
1629
+
1627
1630
  last_offload_obj = None
1628
1631
  class offload:
1629
1632
  def __init__(self):
@@ -2028,7 +2031,9 @@ class offload:
2028
2031
  continue
2029
2032
  lora_A_weight, lora_B_weight, diff_b, alpha = data
2030
2033
  scaling = self._get_lora_scaling(loras_scaling, model, active_adapter) * alpha
2031
- weight.addmm_(lora_B_weight, lora_A_weight, alpha= scaling )
2034
+ if lora_A_weight != None:
2035
+ weight.addmm_(lora_B_weight, lora_A_weight, alpha= scaling )
2036
+
2032
2037
  if diff_b != None:
2033
2038
  if bias == None:
2034
2039
  bias = diff_b.clone()
@@ -2059,17 +2064,20 @@ class offload:
2059
2064
  lora_A, lora_B, diff_b, alpha = data
2060
2065
  # dropout = self.lora_dropout[active_adapter]
2061
2066
  scaling = self._get_lora_scaling(loras_scaling, model, active_adapter) * alpha
2062
- x = x.to(lora_A.dtype)
2063
-
2064
- if training:
2065
- pass
2066
- # y = lora_A(dropout(x))
2067
+ if lora_A == None:
2068
+ result.add_(diff_b, alpha=scaling)
2067
2069
  else:
2068
- y = torch.nn.functional.linear(x, lora_A, bias=None)
2069
- y = torch.nn.functional.linear(y, lora_B, bias=diff_b)
2070
- y*= scaling
2071
- result+= y
2072
- del y
2070
+ x = x.to(lora_A.dtype)
2071
+
2072
+ if training:
2073
+ pass
2074
+ # y = lora_A(dropout(x))
2075
+ else:
2076
+ y = torch.nn.functional.linear(x, lora_A, bias=None)
2077
+ y = torch.nn.functional.linear(y, lora_B, bias=diff_b)
2078
+ y*= scaling
2079
+ result+= y
2080
+ del y
2073
2081
 
2074
2082
  return result
2075
2083
 
@@ -2405,7 +2413,6 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
2405
2413
  model_dtype = getattr(current_model, "_model_dtype", None)
2406
2414
  # if model_dtype == None:
2407
2415
  # model_dtype = getattr(current_model, "dtype", None)
2408
-
2409
2416
  for _ , m in current_model.named_modules():
2410
2417
  ignore_dtype = hasattr(m, "_lock_dtype")
2411
2418
  for n, p in m.named_parameters(recurse = False):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mmgp
3
- Version: 3.4.7
3
+ Version: 3.4.8
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  Requires-Python: >=3.10
@@ -15,7 +15,7 @@ Dynamic: license-file
15
15
 
16
16
 
17
17
  <p align="center">
18
- <H2>Memory Management 3.4.7 for the GPU Poor by DeepBeepMeep</H2>
18
+ <H2>Memory Management 3.4.8 for the GPU Poor by DeepBeepMeep</H2>
19
19
  </p>
20
20
 
21
21
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes