mmgp 3.2.5__py3-none-any.whl → 3.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmgp might be problematic. Click here for more details.

mmgp/offload.py CHANGED
@@ -1,4 +1,4 @@
1
- # ------------------ Memory Management 3.2.5 for the GPU Poor by DeepBeepMeep (mmgp)------------------
1
+ # ------------------ Memory Management 3.2.6 for the GPU Poor by DeepBeepMeep (mmgp)------------------
2
2
  #
3
3
  # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
4
4
  # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -61,7 +61,12 @@ import sys
61
61
  import os
62
62
  import json
63
63
  import psutil
64
- try:
64
+ from accelerate import init_empty_weights
65
+
66
+ try:
67
+
68
+ from peft.tuners.tuners_utils import BaseTuner
69
+
65
70
  from diffusers.utils.peft_utils import set_weights_and_activate_adapters, get_peft_kwargs
66
71
  except:
67
72
  set_weights_and_activate_adapters = None
@@ -297,12 +302,13 @@ def _get_tensor_ref(p):
297
302
  return p.data_ptr()
298
303
 
299
304
 
300
- def _pin_to_memory(model, model_id, partialPinning = False, verboseLevel = 1):
305
+ def _pin_to_memory(model, model_id, partialPinning = False, pinnedLora = True, verboseLevel = 1):
301
306
  if partialPinning:
302
307
  towers_names, _ = _detect_main_towers(model)
303
308
 
304
309
 
305
- BIG_TENSOR_MAX_SIZE = 2**28 # 256 MB
310
+ # BIG_TENSOR_MAX_SIZE = 2**28 # 256 MB
311
+ BIG_TENSOR_MAX_SIZE = 2**27 # 128 MB
306
312
  current_big_tensor_size = 0
307
313
  big_tensor_no = 0
308
314
  big_tensors_sizes = []
@@ -314,6 +320,9 @@ def _pin_to_memory(model, model_id, partialPinning = False, verboseLevel = 1):
314
320
  include = True
315
321
  if partialPinning:
316
322
  include = any(k.startswith(pre) for pre in towers_names) if partialPinning else True
323
+ if include and not pinnedLora and ".lora_" in k:
324
+ include = False
325
+
317
326
  if include:
318
327
  params_dict.update( { k + '.' + n : (p, False) for n, p in sub_module.named_parameters(recurse=False) } )
319
328
  params_dict.update( { k + '.' + n : (b, True) for n, b in sub_module.named_buffers(recurse=False) } )
@@ -479,7 +488,7 @@ def _welcome():
479
488
  if welcome_displayed:
480
489
  return
481
490
  welcome_displayed = True
482
- print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.5) by DeepBeepMeep ************{ENDC}{UNBOLD}")
491
+ print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.6) by DeepBeepMeep ************{ENDC}{UNBOLD}")
483
492
 
484
493
  def _extract_num_from_str(num_in_str):
485
494
  size = len(num_in_str)
@@ -800,7 +809,7 @@ def _lora_linear_forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor
800
809
  scaling = get_scaling(active_adapter)
801
810
  lora_A_weight = lora_A.weight
802
811
  lora_B_weight = lora_B.weight
803
- if new_weights:
812
+ if new_weights or True:
804
813
  base_weight = torch.addmm(base_weight, lora_B_weight, lora_A_weight, alpha= scaling )
805
814
  # base_weight = base_weight + scaling * lora_B_weight @ lora_A_weight
806
815
  else:
@@ -857,7 +866,47 @@ def _lora_linear_forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor
857
866
 
858
867
  result = result.to(torch_result_dtype)
859
868
  return result
860
-
869
+
870
+ def _inject_adapter(
871
+ self, model: torch.nn.Module, adapter_name: str, autocast_adapter_dtype: bool = True, low_cpu_mem_usage: bool = False
872
+ ) -> None:
873
+
874
+ def _get_submodules(model, key):
875
+ parent = model.get_submodule(".".join(key.split(".")[:-1]))
876
+ target_name = key.split(".")[-1]
877
+ target = model.get_submodule(key)
878
+ return parent, target, target_name
879
+
880
+ peft_config = self.peft_config[adapter_name]
881
+ self._check_new_adapter_config(peft_config)
882
+
883
+ model_config = self.get_model_config(model)
884
+
885
+ peft_config = self._prepare_adapter_config(peft_config, model_config)
886
+
887
+ self._prepare_model(peft_config, model)
888
+
889
+ target_modules = peft_config.target_modules.copy()
890
+
891
+ # unexpected_modules = []
892
+ for key, target in model.named_modules():
893
+ if not key:
894
+ continue
895
+ if key in target_modules:
896
+ target_modules.remove(key)
897
+ self.targeted_module_names.append(key)
898
+ # pos = key.rfind(".")
899
+ # parent = key[:pos]
900
+ # target_name = key[pos+1:]
901
+ parent, target, target_name = _get_submodules(model, key)
902
+ with init_empty_weights():
903
+ self._create_and_replace(peft_config, adapter_name, target, target_name, parent, current_key=key)
904
+
905
+ self.set_adapter(self.active_adapters)
906
+ self._mark_only_adapters_as_trainable(model)
907
+
908
+ return target_modules
909
+
861
910
  def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_loras = True, split_linear_modules_map = None, preprocess_sd = None, verboseLevel = -1,):
862
911
  verboseLevel = _compute_verbose_level(verboseLevel)
863
912
 
@@ -866,6 +915,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
866
915
 
867
916
  from peft.tuners.lora import Linear
868
917
  Linear.forward = _lora_linear_forward
918
+ BaseTuner.inject_adapter = _inject_adapter
869
919
 
870
920
  if not isinstance(lora_path, list):
871
921
  lora_path = [lora_path]
@@ -979,7 +1029,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
979
1029
  unexpected_keys = getattr(incompatible_keys, "unexpected_keys", None)
980
1030
  if unexpected_keys:
981
1031
  raise Exception(f"Lora '{path}' contains invalid keys '{unexpected_keys}'")
982
-
1032
+
983
1033
  if verboseLevel >=1:
984
1034
  print(f"Lora '{path}' was loaded in model '{_get_module_name(model)}'")
985
1035
  if activate_all_loras:
@@ -1025,7 +1075,6 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
1025
1075
 
1026
1076
 
1027
1077
  import os.path
1028
- from accelerate import init_empty_weights
1029
1078
 
1030
1079
  if not (model_path.endswith(".sft") or model_path.endswith(".safetensors")):
1031
1080
  raise Exception("full model path to file expected")
@@ -1811,7 +1860,7 @@ class offload:
1811
1860
 
1812
1861
 
1813
1862
 
1814
- def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = True, extraModelsToQuantize = None, quantizationType = qint8, budgets= 0, workingVRAM = None, asyncTransfers = True, compile = False, perc_reserved_mem_max = 0, coTenantsMap = None, verboseLevel = -1):
1863
+ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedLora = False, quantizeTransformer = True, extraModelsToQuantize = None, quantizationType = qint8, budgets= 0, workingVRAM = None, asyncTransfers = True, compile = False, perc_reserved_mem_max = 0, coTenantsMap = None, verboseLevel = -1):
1815
1864
  """Hook to a pipeline or a group of modules in order to reduce their VRAM requirements:
1816
1865
  pipe_or_dict_of_modules : the pipeline object or a dictionary of modules of the model
1817
1866
  quantizeTransformer: set True by default will quantize on the fly the video / image model
@@ -2010,7 +2059,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
2010
2059
  if self.verboseLevel >=1:
2011
2060
  print(f"Model '{model_id}' already pinned to reserved memory")
2012
2061
  else:
2013
- _pin_to_memory(current_model, model_id, partialPinning= partialPinning, verboseLevel=verboseLevel)
2062
+ _pin_to_memory(current_model, model_id, partialPinning= partialPinning, pinnedLora = pinnedLora, verboseLevel=verboseLevel)
2014
2063
 
2015
2064
  current_budget = model_budgets[model_id]
2016
2065
  cur_blocks_prefix, prev_blocks_name, cur_blocks_name,cur_blocks_seq, is_mod_seq = None, None, None, -1, False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mmgp
3
- Version: 3.2.5
3
+ Version: 3.2.6
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  License: GNU GENERAL PUBLIC LICENSE
@@ -17,7 +17,7 @@ Requires-Dist: peft
17
17
 
18
18
 
19
19
  <p align="center">
20
- <H2>Memory Management 3.2.4 for the GPU Poor by DeepBeepMeep</H2>
20
+ <H2>Memory Management 3.2.6 for the GPU Poor by DeepBeepMeep</H2>
21
21
  </p>
22
22
 
23
23
 
@@ -0,0 +1,9 @@
1
+ __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
3
+ mmgp/offload.py,sha256=sN95BJAvdWOu36AWwJlACdxMDiOzeqL2HXLN90oaec4,98169
4
+ mmgp/safetensors2.py,sha256=DCdlRH3769CTyraAmWAB3b0XrVua7z6ygQ-OyKgJN6A,16453
5
+ mmgp-3.2.6.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
6
+ mmgp-3.2.6.dist-info/METADATA,sha256=F7LmNAvBTLEEfFT-Wbh7md4s1U4Vdnt4RrBfuBXpH_s,16151
7
+ mmgp-3.2.6.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
8
+ mmgp-3.2.6.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
9
+ mmgp-3.2.6.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.2)
2
+ Generator: setuptools (76.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,9 +0,0 @@
1
- __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
3
- mmgp/offload.py,sha256=XQOTMMp5UQku3byZwDr_dYgD3tK4DNTZkwotVyPg-Lk,96434
4
- mmgp/safetensors2.py,sha256=DCdlRH3769CTyraAmWAB3b0XrVua7z6ygQ-OyKgJN6A,16453
5
- mmgp-3.2.5.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
6
- mmgp-3.2.5.dist-info/METADATA,sha256=s6c1X2ar9DQH1CiLAHdO5X60fuNfKqfmqu-xL_W6j5s,16151
7
- mmgp-3.2.5.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
8
- mmgp-3.2.5.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
9
- mmgp-3.2.5.dist-info/RECORD,,