PyPI - mmgp - Versions diffs - 3.5.12__tar.gz → 3.6.0__tar.gz - Mend

mmgp 3.5.12tar.gz → 3.6.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mmgp might be problematic. Click here for more details.

Files changed (14) hide show

{mmgp-3.5.12/src/mmgp.egg-info → mmgp-3.6.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mmgp
-Version: 3.5.12
+Version: 3.6.0
 Summary: Memory Management for the GPU Poor
 Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
 Requires-Python: >=3.10
@@ -15,7 +15,7 @@ Dynamic: license-file
 <p align="center">
-  <H2>Memory Management 3.5.12 for the GPU Poor by DeepBeepMeep</H2>
+  <H2>Memory Management 3.6.0 for the GPU Poor by DeepBeepMeep</H2>
 </p>

{mmgp-3.5.12 → mmgp-3.6.0}/README.md RENAMED Viewed

@@ -1,6 +1,6 @@
 <p align="center">
-  <H2>Memory Management 3.5.12 for the GPU Poor by DeepBeepMeep</H2>
+  <H2>Memory Management 3.6.0 for the GPU Poor by DeepBeepMeep</H2>
 </p>

{mmgp-3.5.12 → mmgp-3.6.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "mmgp"
-version = "3.5.12"
+version = "3.6.0"
 authors = [
   { name = "deepbeepmeep", email = "deepbeepmeep@yahoo.com" },
 ]

{mmgp-3.5.12 → mmgp-3.6.0}/src/mmgp/offload.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# ------------------ Memory Management 3.5.12 for the GPU Poor by DeepBeepMeep (mmgp)------------------
+# ------------------ Memory Management 3.6.0 for the GPU Poor by DeepBeepMeep (mmgp)------------------
 #
 # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ...  can run smoothly on a 24 GB GPU limited card.
 # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -688,7 +688,7 @@ def _welcome():
     if welcome_displayed:
          return
     welcome_displayed = True
-    print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.12) by DeepBeepMeep ************{ENDC}{UNBOLD}")
+    print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.6.0) by DeepBeepMeep ************{ENDC}{UNBOLD}")
 def change_dtype(model, new_dtype, exclude_buffers = False):
     for submodule_name, submodule in model.named_modules():
@@ -2287,9 +2287,10 @@ class offload:
         src = f"""
 def {fname}(module, *args, **kwargs):
     _ = __TYPE_CONST  # anchor type as a constant to make code object unique per class
+    nada = "{fname}"
     mgr = module._mm_manager
     mgr._pre_check(module)
-    return module._mm_forward(*args, **kwargs)
+    return module._mm_forward(*args, **kwargs) #{fname}
 """
         ns = {"__TYPE_CONST": mod_cls}
         exec(src, ns)                   # compile a new function object/code object for this class
@@ -2310,7 +2311,8 @@ def {fname}(module, *args, **kwargs):
         wrapper_fn = self._get_wrapper_for_type(type(target_module))
         # bind as a bound method (no partial/closures)
-        target_module.forward = types.MethodType(wrapper_fn, target_module)
+        # target_module.forward = types.MethodType(wrapper_fn, target_module)
+        target_module.forward = functools.update_wrapper(functools.partial(wrapper_fn, target_module), previous_method)
     def hook_check_load_into_GPU_if_needed_default(self, target_module, model, model_id, blocks_name, previous_method,  context):
@@ -2345,12 +2347,12 @@ def {fname}(module, *args, **kwargs):
         if isinstance(target_module, torch.nn.Linear):
             def check_load_into_GPU_needed_linear(module, *args, **kwargs):
                 check_load_into_GPU_needed()
-                return previous_method(*args, **kwargs)
+                return previous_method(*args, **kwargs) # linear
             check_load_into_GPU_needed_module = check_load_into_GPU_needed_linear
         else:
             def check_load_into_GPU_needed_other(module, *args, **kwargs):
                 check_load_into_GPU_needed()
-                return previous_method(*args, **kwargs)
+                return previous_method(*args, **kwargs) # other
             check_load_into_GPU_needed_module = check_load_into_GPU_needed_other
         setattr(target_module, "_mm_id", model_id)
@@ -2498,7 +2500,7 @@ def {fname}(module, *args, **kwargs):
-def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, partialPinning = False, loras = None, quantizeTransformer = True,  extraModelsToQuantize = None, quantizationType = qint8, budgets= 0, workingVRAM = None, asyncTransfers = True, compile = False, convertWeightsFloatTo = torch.bfloat16, perc_reserved_mem_max = 0, coTenantsMap = None, vram_safety_coefficient = 0.8, verboseLevel = -1):
+def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, partialPinning = False, loras = None, quantizeTransformer = True,  extraModelsToQuantize = None, quantizationType = qint8, budgets= 0, workingVRAM = None, asyncTransfers = True, compile = False, convertWeightsFloatTo = torch.bfloat16, perc_reserved_mem_max = 0, coTenantsMap = None, vram_safety_coefficient = 0.8, compile_mode ="default", verboseLevel = -1):
     """Hook to a pipeline or a group of modules in order to reduce their VRAM requirements:
     pipe_or_dict_of_modules : the pipeline object or a dictionary of modules of the model
     quantizeTransformer: set True by default will quantize on the fly the video / image model
@@ -2771,8 +2773,8 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
                     elif compilationInThisOne and submodule in towers_modules:
                         self.hook_preload_blocks_for_compilation(submodule, model_id, cur_blocks_name, context = submodule_name )
                     else:
-                        if compilationInThisOne and False:
-                            self.hook_check_load_into_GPU_needed(submodule, current_model, model_id, cur_blocks_name, submodule_method, context = submodule_name )
+                        if compilationInThisOne: #and False
+                            self.hook_check_load_into_GPU_if_needed(submodule, current_model, model_id, cur_blocks_name, submodule_method, context = submodule_name )
                         else:
                             self.hook_check_load_into_GPU_if_needed_default(submodule, current_model, model_id, cur_blocks_name, submodule_method, context = submodule_name )
@@ -2789,7 +2791,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
                     print(f"Pytorch compilation of model '{model_id}' is not yet supported.")
             for submodel in towers_modules:
-                submodel.forward= torch.compile(submodel.forward,  backend= "inductor", mode="default" ) # , fullgraph= True, mode= "reduce-overhead", "max-autotune", "max-autotune-no-cudagraphs",
+                submodel.forward= torch.compile(submodel.forward,  backend= "inductor", mode= compile_mode) # , fullgraph= True, mode= "reduce-overhead", "max-autotune", "max-autotune-no-cudagraphs",
                     #dynamic=True,
         self.tune_preloading(model_id, current_budget, towers_names)

{mmgp-3.5.12 → mmgp-3.6.0/src/mmgp.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mmgp
-Version: 3.5.12
+Version: 3.6.0
 Summary: Memory Management for the GPU Poor
 Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
 Requires-Python: >=3.10
@@ -15,7 +15,7 @@ Dynamic: license-file
 <p align="center">
-  <H2>Memory Management 3.5.12 for the GPU Poor by DeepBeepMeep</H2>
+  <H2>Memory Management 3.6.0 for the GPU Poor by DeepBeepMeep</H2>
 </p>