mmgp 3.5.12__py3-none-any.whl → 3.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmgp might be problematic. Click here for more details.

mmgp/offload.py CHANGED
@@ -1,4 +1,4 @@
1
- # ------------------ Memory Management 3.5.12 for the GPU Poor by DeepBeepMeep (mmgp)------------------
1
+ # ------------------ Memory Management 3.6.0 for the GPU Poor by DeepBeepMeep (mmgp)------------------
2
2
  #
3
3
  # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
4
4
  # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -688,7 +688,7 @@ def _welcome():
688
688
  if welcome_displayed:
689
689
  return
690
690
  welcome_displayed = True
691
- print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.12) by DeepBeepMeep ************{ENDC}{UNBOLD}")
691
+ print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.6.0) by DeepBeepMeep ************{ENDC}{UNBOLD}")
692
692
 
693
693
  def change_dtype(model, new_dtype, exclude_buffers = False):
694
694
  for submodule_name, submodule in model.named_modules():
@@ -2287,9 +2287,10 @@ class offload:
2287
2287
  src = f"""
2288
2288
  def {fname}(module, *args, **kwargs):
2289
2289
  _ = __TYPE_CONST # anchor type as a constant to make code object unique per class
2290
+ nada = "{fname}"
2290
2291
  mgr = module._mm_manager
2291
2292
  mgr._pre_check(module)
2292
- return module._mm_forward(*args, **kwargs)
2293
+ return module._mm_forward(*args, **kwargs) #{fname}
2293
2294
  """
2294
2295
  ns = {"__TYPE_CONST": mod_cls}
2295
2296
  exec(src, ns) # compile a new function object/code object for this class
@@ -2310,7 +2311,8 @@ def {fname}(module, *args, **kwargs):
2310
2311
  wrapper_fn = self._get_wrapper_for_type(type(target_module))
2311
2312
 
2312
2313
  # bind as a bound method (no partial/closures)
2313
- target_module.forward = types.MethodType(wrapper_fn, target_module)
2314
+ # target_module.forward = types.MethodType(wrapper_fn, target_module)
2315
+ target_module.forward = functools.update_wrapper(functools.partial(wrapper_fn, target_module), previous_method)
2314
2316
 
2315
2317
  def hook_check_load_into_GPU_if_needed_default(self, target_module, model, model_id, blocks_name, previous_method, context):
2316
2318
 
@@ -2345,12 +2347,12 @@ def {fname}(module, *args, **kwargs):
2345
2347
  if isinstance(target_module, torch.nn.Linear):
2346
2348
  def check_load_into_GPU_needed_linear(module, *args, **kwargs):
2347
2349
  check_load_into_GPU_needed()
2348
- return previous_method(*args, **kwargs)
2350
+ return previous_method(*args, **kwargs) # linear
2349
2351
  check_load_into_GPU_needed_module = check_load_into_GPU_needed_linear
2350
2352
  else:
2351
2353
  def check_load_into_GPU_needed_other(module, *args, **kwargs):
2352
2354
  check_load_into_GPU_needed()
2353
- return previous_method(*args, **kwargs)
2355
+ return previous_method(*args, **kwargs) # other
2354
2356
  check_load_into_GPU_needed_module = check_load_into_GPU_needed_other
2355
2357
 
2356
2358
  setattr(target_module, "_mm_id", model_id)
@@ -2498,7 +2500,7 @@ def {fname}(module, *args, **kwargs):
2498
2500
 
2499
2501
 
2500
2502
 
2501
- def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, partialPinning = False, loras = None, quantizeTransformer = True, extraModelsToQuantize = None, quantizationType = qint8, budgets= 0, workingVRAM = None, asyncTransfers = True, compile = False, convertWeightsFloatTo = torch.bfloat16, perc_reserved_mem_max = 0, coTenantsMap = None, vram_safety_coefficient = 0.8, verboseLevel = -1):
2503
+ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, partialPinning = False, loras = None, quantizeTransformer = True, extraModelsToQuantize = None, quantizationType = qint8, budgets= 0, workingVRAM = None, asyncTransfers = True, compile = False, convertWeightsFloatTo = torch.bfloat16, perc_reserved_mem_max = 0, coTenantsMap = None, vram_safety_coefficient = 0.8, compile_mode ="default", verboseLevel = -1):
2502
2504
  """Hook to a pipeline or a group of modules in order to reduce their VRAM requirements:
2503
2505
  pipe_or_dict_of_modules : the pipeline object or a dictionary of modules of the model
2504
2506
  quantizeTransformer: set True by default will quantize on the fly the video / image model
@@ -2771,8 +2773,8 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
2771
2773
  elif compilationInThisOne and submodule in towers_modules:
2772
2774
  self.hook_preload_blocks_for_compilation(submodule, model_id, cur_blocks_name, context = submodule_name )
2773
2775
  else:
2774
- if compilationInThisOne and False:
2775
- self.hook_check_load_into_GPU_needed(submodule, current_model, model_id, cur_blocks_name, submodule_method, context = submodule_name )
2776
+ if compilationInThisOne: #and False
2777
+ self.hook_check_load_into_GPU_if_needed(submodule, current_model, model_id, cur_blocks_name, submodule_method, context = submodule_name )
2776
2778
  else:
2777
2779
  self.hook_check_load_into_GPU_if_needed_default(submodule, current_model, model_id, cur_blocks_name, submodule_method, context = submodule_name )
2778
2780
 
@@ -2789,7 +2791,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
2789
2791
  print(f"Pytorch compilation of model '{model_id}' is not yet supported.")
2790
2792
 
2791
2793
  for submodel in towers_modules:
2792
- submodel.forward= torch.compile(submodel.forward, backend= "inductor", mode="default" ) # , fullgraph= True, mode= "reduce-overhead", "max-autotune", "max-autotune-no-cudagraphs",
2794
+ submodel.forward= torch.compile(submodel.forward, backend= "inductor", mode= compile_mode) # , fullgraph= True, mode= "reduce-overhead", "max-autotune", "max-autotune-no-cudagraphs",
2793
2795
  #dynamic=True,
2794
2796
 
2795
2797
  self.tune_preloading(model_id, current_budget, towers_names)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mmgp
3
- Version: 3.5.12
3
+ Version: 3.6.0
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  Requires-Python: >=3.10
@@ -15,7 +15,7 @@ Dynamic: license-file
15
15
 
16
16
 
17
17
  <p align="center">
18
- <H2>Memory Management 3.5.12 for the GPU Poor by DeepBeepMeep</H2>
18
+ <H2>Memory Management 3.6.0 for the GPU Poor by DeepBeepMeep</H2>
19
19
  </p>
20
20
 
21
21
 
@@ -0,0 +1,9 @@
1
+ __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
3
+ mmgp/offload.py,sha256=UaqWLw8jrNs9ibbIWplbLO5Cym84Txuu4lzttgxmnXs,132411
4
+ mmgp/safetensors2.py,sha256=zYNMprt1KoxgVALbcz6DawxsQDNNRImvgO9cYRChUiY,19028
5
+ mmgp-3.6.0.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
6
+ mmgp-3.6.0.dist-info/METADATA,sha256=epm8_KuIB_c4W9iB31KIbHtNjdVuLyvW-DZoc8RR434,16309
7
+ mmgp-3.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
8
+ mmgp-3.6.0.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
9
+ mmgp-3.6.0.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
3
- mmgp/offload.py,sha256=qUr0KW3eGtkNLc2eZvgz8roK2jFh9T-KpPe6icSin7I,132211
4
- mmgp/safetensors2.py,sha256=zYNMprt1KoxgVALbcz6DawxsQDNNRImvgO9cYRChUiY,19028
5
- mmgp-3.5.12.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
6
- mmgp-3.5.12.dist-info/METADATA,sha256=zbOHAwD5QciOmKHWdHt9zpMO3KtIyYadeVytReJ52lo,16311
7
- mmgp-3.5.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
8
- mmgp-3.5.12.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
9
- mmgp-3.5.12.dist-info/RECORD,,
File without changes