mmgp 3.5.6__tar.gz → 3.5.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmgp might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mmgp
3
- Version: 3.5.6
3
+ Version: 3.5.7
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  Requires-Python: >=3.10
@@ -15,7 +15,7 @@ Dynamic: license-file
15
15
 
16
16
 
17
17
  <p align="center">
18
- <H2>Memory Management 3.5.6 for the GPU Poor by DeepBeepMeep</H2>
18
+ <H2>Memory Management 3.5.7 for the GPU Poor by DeepBeepMeep</H2>
19
19
  </p>
20
20
 
21
21
 
@@ -1,6 +1,6 @@
1
1
 
2
2
  <p align="center">
3
- <H2>Memory Management 3.5.6 for the GPU Poor by DeepBeepMeep</H2>
3
+ <H2>Memory Management 3.5.7 for the GPU Poor by DeepBeepMeep</H2>
4
4
  </p>
5
5
 
6
6
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mmgp"
3
- version = "3.5.6"
3
+ version = "3.5.7"
4
4
  authors = [
5
5
  { name = "deepbeepmeep", email = "deepbeepmeep@yahoo.com" },
6
6
  ]
@@ -1,4 +1,4 @@
1
- # ------------------ Memory Management 3.5.6 for the GPU Poor by DeepBeepMeep (mmgp)------------------
1
+ # ------------------ Memory Management 3.5.7 for the GPU Poor by DeepBeepMeep (mmgp)------------------
2
2
  #
3
3
  # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
4
4
  # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -122,8 +122,6 @@ class clock:
122
122
  def format_time_gap(self):
123
123
  return f"{self.stop_time - self.start_time:.2f}s"
124
124
 
125
-
126
-
127
125
  # useful functions to move a group of tensors (to design custom offload patches)
128
126
  def move_tensors(obj, device):
129
127
  if torch.is_tensor(obj):
@@ -668,7 +666,7 @@ def _welcome():
668
666
  if welcome_displayed:
669
667
  return
670
668
  welcome_displayed = True
671
- print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.6) by DeepBeepMeep ************{ENDC}{UNBOLD}")
669
+ print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.7) by DeepBeepMeep ************{ENDC}{UNBOLD}")
672
670
 
673
671
  def change_dtype(model, new_dtype, exclude_buffers = False):
674
672
  for submodule_name, submodule in model.named_modules():
@@ -1295,7 +1293,7 @@ def move_loras_to_device(model, device="cpu" ):
1295
1293
  if ".lora_" in k:
1296
1294
  m.to(device)
1297
1295
 
1298
- def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, defaultConfigPath = None, modelClass=None, modelPrefix = None, writable_tensors = True, verboseLevel = -1, modules = None, return_shared_modules = None, configKwargs ={}):
1296
+ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, defaultConfigPath = None, modelClass=None, modelPrefix = None, writable_tensors = True, verboseLevel = -1, preprocess_sd = None, modules = None, return_shared_modules = None, configKwargs ={}):
1299
1297
  """
1300
1298
  quick version of .LoadfromPretrained of the transformers library
1301
1299
  used to build a model and load the corresponding weights (quantized or not)
@@ -1383,13 +1381,13 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantiza
1383
1381
 
1384
1382
  model._config = transformer_config
1385
1383
 
1386
- load_model_data(model,model_path, do_quantize = do_quantize, quantizationType = quantizationType, pinToMemory= pinToMemory, partialPinning= partialPinning, modelPrefix = modelPrefix, writable_tensors =writable_tensors, modules = modules, return_shared_modules = return_shared_modules, verboseLevel=verboseLevel )
1384
+ load_model_data(model,model_path, do_quantize = do_quantize, quantizationType = quantizationType, pinToMemory= pinToMemory, partialPinning= partialPinning, modelPrefix = modelPrefix, writable_tensors =writable_tensors, preprocess_sd = preprocess_sd , modules = modules, return_shared_modules = return_shared_modules, verboseLevel=verboseLevel )
1387
1385
 
1388
1386
  return model
1389
1387
 
1390
1388
 
1391
1389
 
1392
- def load_model_data(model, file_path, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, modelPrefix = None, writable_tensors = True, modules = None, return_shared_modules = None, verboseLevel = -1):
1390
+ def load_model_data(model, file_path, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, modelPrefix = None, writable_tensors = True, preprocess_sd = None, modules = None, return_shared_modules = None, verboseLevel = -1):
1393
1391
  """
1394
1392
  Load a model, detect if it has been previously quantized using quanto and do the extra setup if necessary
1395
1393
  """
@@ -1506,6 +1504,9 @@ def load_model_data(model, file_path, do_quantize = False, quantizationType = qi
1506
1504
  full_state_dict, full_quantization_map, full_tied_weights_map = None, None, None
1507
1505
 
1508
1506
  # deal if we are trying to load just a sub part of a larger model
1507
+ if preprocess_sd != None:
1508
+ state_dict, quantization_map = preprocess_sd(state_dict, quantization_map)
1509
+
1509
1510
  if modelPrefix != None:
1510
1511
  base_model_prefix = modelPrefix + "."
1511
1512
  state_dict = filter_state_dict(state_dict,base_model_prefix)
@@ -2276,7 +2277,7 @@ class offload:
2276
2277
  setattr(target_module, "forward", functools.update_wrapper(functools.partial(check_empty_cuda_cache, target_module), previous_method) )
2277
2278
 
2278
2279
 
2279
- def hook_change_module(self, target_module, model, model_id, module_id, previous_method):
2280
+ def hook_change_module(self, target_module, model, model_id, module_id, previous_method, previous_method_name ):
2280
2281
  if hasattr(target_module, "_lock_dtype"):
2281
2282
  dtype = target_module._lock_dtype
2282
2283
  else:
@@ -2289,11 +2290,12 @@ class offload:
2289
2290
  args, kwargs = self.move_args_to_gpu(dtype, *args, **kwargs)
2290
2291
  return previous_method(*args, **kwargs)
2291
2292
 
2292
- if hasattr(target_module, "_mm_id"):
2293
+ if hasattr(target_module, "_mm_" + previous_method_name):
2293
2294
  return
2294
- setattr(target_module, "_mm_id", model_id)
2295
+ setattr(target_module, "_mm_Id", model_id)
2296
+ setattr(target_module, "_mm_" + previous_method_name, previous_method)
2295
2297
 
2296
- setattr(target_module, "forward", functools.update_wrapper(functools.partial(check_change_module, target_module), previous_method) )
2298
+ setattr(target_module, previous_method_name, functools.update_wrapper(functools.partial(check_change_module, target_module), previous_method) )
2297
2299
 
2298
2300
  if not self.verboseLevel >=1:
2299
2301
  return
@@ -2661,23 +2663,27 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
2661
2663
  cur_blocks_prefix, prev_blocks_name, cur_blocks_seq, is_mod_seq = pre, None, num, False
2662
2664
  cur_blocks_name = submodule_name
2663
2665
  # print(f"new block: {model_id}/{cur_blocks_name} - {submodule_name}")
2664
-
2665
-
2666
- if hasattr(submodule, "forward"):
2667
- # if any_lora and isinstance(submodule, ( torch.nn.Linear, torch.nn.Conv3d, torch.nn.LayerNorm)):
2668
- if any_lora and hasattr(submodule,"weight"):
2666
+ top_submodule = len(submodule_name.split("."))==1
2667
+ offload_hooks = submodule._offload_hooks if hasattr(submodule, "_offload_hooks") else []
2668
+ if len(offload_hooks) > 0:
2669
+ pass
2670
+ assert top_submodule or len(offload_hooks) == 0, "custom offload hooks can only be set at the of the module"
2671
+ submodule_method_names = ["forward"] + offload_hooks
2672
+ for submodule_method_name in submodule_method_names:
2673
+ if not hasattr(submodule, submodule_method_name ): continue
2674
+ if submodule_method_name == "forward" and any_lora and hasattr(submodule,"weight"):
2669
2675
  submodule_method = self.hook_lora(submodule, current_model, model_id, loras_model_data, loras_model_shortcuts, submodule_name)
2670
2676
  else:
2671
- submodule_method = getattr(submodule, "forward")
2672
- if callable(submodule_method):
2673
- if len(submodule_name.split("."))==1:
2674
- self.hook_change_module(submodule, current_model, model_id, submodule_name, submodule_method)
2677
+ submodule_method = getattr(submodule, submodule_method_name)
2678
+ if callable(submodule_method):
2679
+ if top_submodule and cur_blocks_name is None:
2680
+ self.hook_change_module(submodule, current_model, model_id, submodule_name, submodule_method, submodule_method_name)
2675
2681
  elif compilationInThisOne and submodule in towers_modules:
2676
2682
  self.hook_preload_blocks_for_compilation(submodule, model_id, cur_blocks_name, context = submodule_name )
2677
2683
  else:
2678
2684
  self.hook_check_empty_cache_needed(submodule, current_model, model_id, cur_blocks_name, submodule_method, context = submodule_name )
2679
-
2680
- self.add_module_to_blocks(model_id, cur_blocks_name, submodule, prev_blocks_name, submodule_name)
2685
+
2686
+ self.add_module_to_blocks(model_id, cur_blocks_name, submodule, prev_blocks_name, submodule_name)
2681
2687
 
2682
2688
 
2683
2689
  self.tune_preloading(model_id, current_budget, towers_names)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mmgp
3
- Version: 3.5.6
3
+ Version: 3.5.7
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  Requires-Python: >=3.10
@@ -15,7 +15,7 @@ Dynamic: license-file
15
15
 
16
16
 
17
17
  <p align="center">
18
- <H2>Memory Management 3.5.6 for the GPU Poor by DeepBeepMeep</H2>
18
+ <H2>Memory Management 3.5.7 for the GPU Poor by DeepBeepMeep</H2>
19
19
  </p>
20
20
 
21
21
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes